# HG changeset patch # User Christian Urban # Date 1371302598 14400 # Node ID 4794759139eac3096af58b589af38e2085661dfb # Parent e85600529ca504eb12b927121719a3f5543f91b4 better organised diff -r e85600529ca5 -r 4794759139ea beamerthemeplainculight.sty --- a/beamerthemeplainculight.sty Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -%%\Providespackage{beamerthemeplainculight}[2003/11/07 ver 0.93] -\NeedsTeXFormat{LaTeX2e}[1995/12/01] - -% Copyright 2003 by Till Tantau . -% -% This program can be redistributed and/or modified under the terms -% of the LaTeX Project Public License Distributed from CTAN -% archives in directory macros/latex/base/lppl.txt. - -\newcommand{\slidecaption}{} - -\mode - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% comic fonts fonts -\DeclareFontFamily{T1}{comic}{}% -\DeclareFontShape{T1}{comic}{m}{n}{<->s*[.9]comic8t}{}% -\DeclareFontShape{T1}{comic}{m}{it}{<->s*[.9]comic8t}{}% -\DeclareFontShape{T1}{comic}{m}{sc}{<->s*[.9]comic8t}{}% -\DeclareFontShape{T1}{comic}{b}{n}{<->s*[.9]comicbd8t}{}% -\DeclareFontShape{T1}{comic}{b}{it}{<->s*[.9]comicbd8t}{}% -\DeclareFontShape{T1}{comic}{m}{sl}{<->ssub * comic/m/it}{}% -\DeclareFontShape{T1}{comic}{b}{sc}{<->sub * comic/m/sc}{}% -\DeclareFontShape{T1}{comic}{b}{sl}{<->ssub * comic/b/it}{}% -\DeclareFontShape{T1}{comic}{bx}{n}{<->ssub * comic/b/n}{}% -\DeclareFontShape{T1}{comic}{bx}{it}{<->ssub * comic/b/it}{}% -\DeclareFontShape{T1}{comic}{bx}{sc}{<->sub * comic/m/sc}{}% -\DeclareFontShape{T1}{comic}{bx}{sl}{<->ssub * comic/b/it}{}% -% -\renewcommand{\rmdefault}{comic}% -\renewcommand{\sfdefault}{comic}% -\renewcommand{\mathfamilydefault}{cmr}% mathfont should be still the old one -% -\DeclareMathVersion{bold}% mathfont needs to be bold -\DeclareSymbolFont{operators}{OT1}{cmr}{b}{n}% -\SetSymbolFont{operators}{bold}{OT1}{cmr}{b}{n}% -\DeclareSymbolFont{letters}{OML}{cmm}{b}{it}% -\SetSymbolFont{letters}{bold}{OML}{cmm}{b}{it}% -\DeclareSymbolFont{symbols}{OMS}{cmsy}{b}{n}% -\SetSymbolFont{symbols}{bold}{OMS}{cmsy}{b}{n}% -\DeclareSymbolFont{largesymbols}{OMX}{cmex}{b}{n}% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Frametitles -\setbeamerfont{frametitle}{size={\LARGE}} -\setbeamerfont{frametitle}{family={\usefont{T1}{ptm}{b}{n}}} -\setbeamercolor{frametitle}{fg=ProcessBlue,bg=white} - -\setbeamertemplate{frametitle}{% -\vskip 2mm % distance from the top margin -\hskip -3mm % distance from left margin -\vbox{% -\begin{minipage}{1.05\textwidth}% -\centering% -\insertframetitle% -\end{minipage}}% -} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Foot -% -\setbeamertemplate{navigation symbols}{} -\usefoottemplate{% -\vbox{% - \tinyline{% - \tiny\hfill\textcolor{gray!50}{\slidecaption{} -- - p.~\insertframenumber/\inserttotalframenumber}}}% -} - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\beamertemplateballitem -\setlength\leftmargini{2mm} -\setlength\leftmarginii{0.6cm} -\setlength\leftmarginiii{1.5cm} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% blocks -%\definecolor{cream}{rgb}{1,1,.65} -\definecolor{cream}{rgb}{1,1,.8} -\setbeamerfont{block title}{size=\normalsize} -\setbeamercolor{block title}{fg=black,bg=cream} -\setbeamercolor{block body}{fg=black,bg=cream} - -\setbeamertemplate{blocks}[rounded][shadow=true] - -\setbeamercolor{boxcolor}{fg=black,bg=cream} - -\mode - - - - - - - diff -r e85600529ca5 -r 4794759139ea fib.while --- a/fib.while Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -/* Fibonacci Program - input: n -*/ - -n := 19; -minus1 := 0; -minus2 := 1; -while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 -}; -write minus2 - diff -r e85600529ca5 -r 4794759139ea hw/hw01.pdf Binary file hw/hw01.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw01.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw01.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,42 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} + +\begin{document} + +\section*{Homework 1} + +\begin{enumerate} +\item {\bf (Optional)} If you want to run the code presented +in the lectures, install the +Scala programming language available (for free) from +\begin{center} +\url{http://www.scala-lang.org} +\end{center} + +\item {\bf (Optional)} Have a look at the crawler programs. +Can you find a usage for them in your daily programming life? + +\item In the context of the AFL-course, what is meant by the term \emph{language}? + +\item Give the definition for regular expressions. What is the meaning of a +regular expression? + +\item Assume the concatenation operation of two strings is written as $s_1 @ s_2$. +Define the operation of \emph{concatenating} two sets of strings. + +\item How is the power of a language defined? (Hint: There are two rules, one for $\_\!\_^0$ and +one for $\_\!\_^{n+1}$.) + +\item Given the regular expressions $r_1 = \epsilon$ and $r_2 = \varnothing$ and $r_3 = a$. +How many strings can the regular expressions $r_1^*$, $r_2^*$ and $r_3^*$ each match? + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw02.pdf Binary file hw/hw02.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw02.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,36 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\begin{document} + +\section*{Homework 2} + +\begin{enumerate} +\item Give regular expressions for (a) decimal numbers and for (b) binary numbers. +(Hint: Observe that the empty string is not a number. Also observe that leading 0s +are normally not written.) + +\item Decide whether the following two regular expressions are equivalent $(\epsilon + a)^* \equiv^? a^*$ and +$(a \cdot b)^* \cdot a \equiv^? a \cdot (b \cdot a)^*$. + +\item Given the regular expression $r = (a \cdot b + b)^*$. Compute what the derivative of $r$ is with respect to +$a$ and $b$. Is $r$ nullable? + +\item What is a regular language? + +\item Prove that for all regular expressions $r$ we have +\begin{center} +$\text{nullable}(r)$ \quad if and only if \quad $\texttt{""} \in L(r)$ +\end{center} + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw03.pdf Binary file hw/hw03.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw03.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw03.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,49 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\begin{document} + +\section*{Homework 3} + +\begin{enumerate} +\item Assume you have an alphabet consisting of the letters $a$, $b$ and $c$ only. +(a) Find a regular expression that recognises the two strings $ab$ and $ac$. (b) +Find a regular expression that matches all strings \emph{except} these two strings. +Note, you can only use regular expressions of the form +\begin{center} +$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ +\end{center} + +\item Define the function $zeroable$ which takes a regular expression as argument +and returns a boolean.\footnote{In an earlier version there was an error.} The +function should satisfy the following property: +\begin{center} +$zeroable(r)$ \;if and only if\; $L(r) = \varnothing$ +\end{center} + +\item Define the tokens and regular expressions for a language +consisting of numbers, left-parenthesis (, right-parenthesis ), +identifiers and the operations $+$, $-$ and $*$. Can the following strings +in this language be lexed? + +\begin{itemize} +\item \texttt{"}$(a + 3) * b$\texttt{"} +\item \texttt{"}$)()++ -33$\texttt{"} +\item \texttt{"}$(a / 3) * 3$\texttt{"} +\end{itemize} + + +In case they can, can you give the corresponding token sequences. +\end{enumerate} + + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw04.pdf Binary file hw/hw04.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw04.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw04.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,109 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 4} + +\begin{enumerate} +\item Why is every finite set of strings a regular language? + +\item What is the language recognised by the regular expressions $(\varnothing^*)^*$. + +\item If a regular expression $r$ does not contain any occurrence of $\varnothing$ +is it possible for $L(r)$ to be empty? + +\item Assume that $s^{-1}$ stands for the operation of reversing a +string $s$. Given the following \emph{reversing} function on regular +expressions + +\begin{center} +\begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l} +$rev(\varnothing)$ & $\dn$ & $\varnothing$\\ +$rev(\epsilon)$ & $\dn$ & $\epsilon$\\ +$rev(c)$ & $\dn$ & $c$\\ +$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\ +$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\ +$rev(r^*)$ & $\dn$ & $rev(r)^*$\\ +\end{tabular} +\end{center} + + +and the set + +\begin{center} +$Rev\,A \dn \{s^{-1} \;|\; s \in A\}$ +\end{center} + +prove whether + +\begin{center} +$L(rev(r)) = Rev (L(r))$ +\end{center} + +holds. + +\item Give a regular expression over the alphabet $\{a,b\}$ recognising all strings +that do not contain any substring $bb$ and end in $a$. + +\item Assume the delimiters for comments are \texttt{$\slash$*} and \texttt{*$\slash$}. +Give a regular expression that can recognise comments +of the form + +\begin{center} +\texttt{$\slash$*~\ldots{}~*$\slash$} +\end{center} + +where the three dots stand for arbitrary characters, but not comment delimiters. +(Hint: You can assume you are already given a regular expression written \texttt{ALL}, +that can recognise any character, and a regular expression \texttt{NOT} that recognises +the complement of a regular expression.) + +\item Given the alphabet $\{a,b\}$. Draw the automaton that has two states, say $q_0$ and $q_1$. +The starting state is $q_0$ and the final state is $q_1$. The transition +function is given by + +\begin{center} +\begin{tabular}{l} +$(q_0, a) \rightarrow q_0$\\ +$(q_0, b) \rightarrow q_1$\\ +$(q_1, b) \rightarrow q_1$ +\end{tabular} +\end{center} + +What is the languages recognised by this automaton? + +\item Give a deterministic finite automaton that can recognise +the language $L(a^*\cdot b\cdot b^*)$. + + +\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as +argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so +that it filters out all comments and whitespace from the result. + +\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it +implements the \texttt{findAll} function. This function takes a regular +expressions and a string, and returns all substrings in this string that +match the regular expression. +\end{enumerate} + +% explain what is a context-free grammar and the language it generates +% +% +% Define the language L(M) accepted by a deterministic finite automaton M. +% +% +% does (a + b)*b+ and (a*b+) + (b*b+) define the same language + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw05.pdf Binary file hw/hw05.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw05.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw05.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,116 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 5} + +\begin{enumerate} +\item Define the following regular expressions + +\begin{center} +\begin{tabular}{ll} +$r^+$ & (one or more matches)\\ +$r^?$ & (zero or one match)\\ +$r^{\{n\}}$ & (exactly $n$ matches)\\ +$r^{\{m, n\}}$ & (at least $m$ and maximal $n$ matches, with the\\ +& \phantom{(}assumption $m \le n$)\\ +\end{tabular} +\end{center} + +in terms of the usual regular expressions + +\begin{center} +$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ +\end{center} + +\item Given a deterministic finite automata $A(Q, q_0, F, \delta)$, +define which language is recognised by this automaton. + +\item Given the following deterministic finite automata over the alphabet +$\{a, b\}$, find an automaton that recognises the complement language. +(Hint: Recall that for the algorithm from the lectures, the automaton needs to be +in completed form, that is have a transition for every letter from the alphabet.) + +\begin{center} +\begin{tikzpicture}[scale=3, line width=0.7mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state, accepting] (q1) at ( 1,1) {$q_1$}; + \path[->] (q0) edge node[above] {$a$} (q1) + (q1) edge [loop right] node {$b$} () + ; +\end{tikzpicture} +\end{center} + +\item Given the following deterministic finite automaton + +\begin{center} +\begin{tikzpicture}[scale=3, line width=0.7mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state,accepting] (q1) at ( 1,1) {$q_1$}; + \node[state, accepting] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge node[above] {$b$} (q1) + (q1) edge [loop above] node[above] {$a$} () + (q2) edge [loop above] node[above] {$a, b$} () + (q1) edge node[above] {$b$} (q2) + (q0) edge[bend right] node[below] {$a$} (q2) + ; +\end{tikzpicture} +\end{center} +find the corresponding minimal automaton. State clearly which nodes +can be merged. + +\item Given the following non-deterministic finite automaton over the alphabet $\{a, b\}$, +find a deterministic finite automaton that recognises the same language: + +\begin{center} +\begin{tikzpicture}[scale=3, line width=0.7mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state] (q1) at ( 1,1) {$q_1$}; + \node[state, accepting] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge node[above] {$a$} (q1) + (q0) edge [loop above] node[above] {$b$} () + (q0) edge [loop below] node[below] {$a$} () + (q1) edge node[above] {$a$} (q2) + ; +\end{tikzpicture} +\end{center} + +\item +Given the following finite deterministic automaton over the alphabet $\{a, b\}$: + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; + \node[state, accepting] (q1) at ( 1,1) {$q_1$}; + \node[state] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge[bend left] node[above] {$a$} (q1) + (q1) edge[bend left] node[above] {$b$} (q0) + (q2) edge[bend left=50] node[below] {$b$} (q0) + (q1) edge node[above] {$a$} (q2) + (q2) edge [loop right] node {$a$} () + (q0) edge [loop below] node {$b$} () + ; +\end{tikzpicture} +\end{center} + +Give a regular expression that can recognise the same language as +this automaton. (Hint: If you use Brzozwski's method, you can assume +Arden's lemma which states that an equation of the form $q = q\cdot r + s$ +has the unique solution $q = s \cdot r^*$.)\ +\end{enumerate} + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw06.pdf Binary file hw/hw06.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw06.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw06.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,72 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 6} + +\begin{enumerate} +\item (i) Give the regular expressions for lexing a language +consisting of whitespaces, identifiers (some letters followed by digits), numbers, +operations \texttt{=}, \texttt{<} and \texttt{>}, and the keywords +\texttt{if}, \texttt{then} and \texttt{else}. +(ii) Decide whether the following strings +can be lexed in this language? + +\begin{enumerate} +\item \texttt{"if y4 = 3 then 1 else 3"} +\item \texttt{"if33 ifif then then23 else else 32"} +\item \texttt{"if x4x < 33 then 1 else 3"} +\end{enumerate} + +In case they can, give the corresponding token sequences. (Hint: +Observe the maximal munch rule and priorities of your regular +expressions that make the process of lexing unambiguous.) + +\item Suppose the grammar + +\begin{center} +\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F \;|\; F \cdot \backslash \cdot F$\\ +$F$ & $\rightarrow$ & $T \;|\; T \cdot \texttt{+} \cdot T \;|\; T \cdot \texttt{-} \cdot T$\\ +$T$ & $\rightarrow$ & $num \;|\; \texttt{(} \cdot E \cdot \texttt{)}$\\ +\end{tabular} +\end{center} + +where $E$, $F$ and $T$ are non-terminals, $E$ is the starting symbol of the grammar, and $num$ stands for +a number token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. + +\item Define what it means for a grammar to be ambiguous. Give an example of +an ambiguous grammar. + +\item Suppose boolean expressions are built up from + +\begin{center} +\begin{tabular}{ll} +1.) & tokens for \texttt{true} and \texttt{false},\\ +2.) & the infix operations \texttt{$\wedge$} and \texttt{$\vee$},\\ +3.) & the prefix operation $\neg$, and\\ +4.) & can be enclosed in parentheses. +\end{tabular} +\end{center} + +(i) Give a grammar that can recognise such boolean expressions +and (ii) give a sample string involving all rules given in 1.-4.~that +can be parsed by this grammar. + + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw07.pdf Binary file hw/hw07.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw07.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw07.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,75 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 7} + +\begin{enumerate} +\item Suppose the following finite deterministic automaton over the alphabet $\{0, 1\}$. + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; + \node[state, accepting] (q1) at ( 1,1) {$q_1$}; + \node[state] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge[bend left] node[above] {$0$} (q1) + (q1) edge[bend left] node[above] {$1$} (q0) + (q2) edge[bend left=50] node[below] {$1$} (q0) + (q1) edge node[above] {$0$} (q2) + (q2) edge [loop right] node {$0$} () + (q0) edge [loop below] node {$1$} () + ; +\end{tikzpicture} +\end{center} + +Give a regular expression that can recognise the same language as +this automaton. (Hint: If you use Brzozwski's method, you can assume +Arden's lemma which states that an equation of the form $q = q\cdot r + s$ +has the unique solution $q = s \cdot r^*$.) + +\item Consider the following grammar + +\begin{center} +\begin{tabular}{l} +$S \rightarrow N\cdot P$\\ +$P \rightarrow V\cdot N$\\ +$N \rightarrow N\cdot N$\\ +$N \rightarrow A \cdot N$\\ +$N \rightarrow \texttt{student} \;|\; \texttt{trainer} \;|\; \texttt{team} \;|\; \texttt{trains}$\\ +$V \rightarrow \texttt{trains} \;|\; \texttt{team}$\\ +$A \rightarrow \texttt{The} \;|\; \texttt{the}$\\ +\end{tabular} +\end{center} + +where $S$ is the start symbol and $S$, $P$, $N$, $V$ and $A$ are non-terminals. +Using the CYK-algorithm, check whether or not the following string can be parsed +by the grammar: + +\begin{center} +\texttt{The trainer trains the student team} +\end{center} + +\item {\bf (Optional)} The task is to match strings where the letters are in alphabetical order---for example, +\texttt{abcfjz} would pass, but \texttt{acb} would not. Whitespace should be ignored---for example +\texttt{ab c d} should pass. The point is to try to get the regular expression as short as possible! +See: + +\begin{center} +\url{http://callumacrae.github.com/regex-tuesday/challenge11.html} +\end{center} +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/hw08.pdf Binary file hw/hw08.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/hw08.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/hw08.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,52 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 8} + +\begin{enumerate} +\item Suppose the following grammar for the WHILE-language: + +\begin{center} +\begin{tabular}{lcl} +$Stmt$ & $\rightarrow$ & $\text{skip}$\\ + & $|$ & $Id := AExp$\\ + & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ + & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ +$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ + & $|$ & $Stmt$\medskip\\ +$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ + & $|$ & $Stmt$\medskip\\ +$AExp$ & $\rightarrow$ & $AExp + AExp$\\ + & $|$ & $AExp * AExp$\\ + & $|$ & $( AExp )$\\ + & $|$ & $Num$\\ + & $|$ & $Id$\medskip\\ +$BExp$ & $\rightarrow$ & $AExp = AExp$\\ + & $|$ & $AExp \not= AExp$\\ + & $|$ & $\text{false}$\\ + & $|$ & $\text{true}$\\ + +\end{tabular} +\end{center} + +Transform this grammar into Chomsky normalform. + +\item Write a program in the WHILE-language that calculates the factorial function. + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw/proof.pdf Binary file hw/proof.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw/proof.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hw/proof.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,210 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions +\begin{document} + +\section*{Proof} + +Recall the definitions for regular expressions and the language associated with a regular expression: + +\begin{center} +\begin{tabular}{c} +\begin{tabular}[t]{rcl} + $r$ & $::=$ & $\varnothing$ \\ + & $\mid$ & $\epsilon$ \\ + & $\mid$ & $c$ \\ + & $\mid$ & $r_1 \cdot r_2$ \\ + & $\mid$ & $r_1 + r_2$ \\ + & $\mid$ & $r^*$ \\ + \end{tabular}\hspace{10mm} +\begin{tabular}[t]{r@{\hspace{1mm}}c@{\hspace{1mm}}l} +$L(\varnothing)$ & $\dn$ & $\varnothing$ \\ +$L(\epsilon)$ & $\dn$ & $\{\texttt{""}\}$ \\ +$L(c)$ & $\dn$ & $\{\texttt{"}c\texttt{"}\}$ \\ +$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ \\ +$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ \\ + $L(r^*)$ & $\dn$ & $\bigcup_{n\ge 0} L(r)^n$ \\ + \end{tabular} +\end{tabular} +\end{center} + +\noindent +We also defined the notion of a derivative of a regular expression (the derivative with respect to a character): + +\begin{center} +\begin{tabular}{lcl} + $der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$ \\ + $der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ \\ + $der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\ + $der\, c\, (r_1 + r_2)$ & $\dn$ & $(der\, c\, r_1) + (der\, c\, r_2)$ \\ + $der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable(r_1)$\\ + & & then $((der\, c\, r_1) \cdot r_2) + (der\, c\, r_2)$\\ + & & else $(der\, c\, r_1) \cdot r_2$\\ + $der\, c\, (r^*)$ & $\dn$ & $(der\, c\, r) \cdot (r^*)$\\ + \end{tabular} +\end{center} + +\noindent +With our definition of regular expressions comes an induction principle. Given a property $P$ over +regular expressions. We can establish that $\forall r.\; P(r)$ holds, provided we can show the following: + +\begin{enumerate} +\item $P(\varnothing)$, $P(\epsilon)$ and $P(c)$ all hold, +\item $P(r_1 + r_2)$ holds under the induction hypotheses that +$P(r_1)$ and $P(r_2)$ hold, +\item $P(r_1 \cdot r_2)$ holds under the induction hypotheses that +$P(r_1)$ and $P(r_2)$ hold, and +\item $P(r^*)$ holds under the induction hypothesis that $P(r)$ holds. +\end{enumerate} + +\noindent +Let us try out an induction proof. Recall the definition + +\begin{center} +$Der\, c\, A \dn \{ s\;\mid\; c\!::\!s \in A\}$ +\end{center} + +\noindent +whereby $A$ is a set of strings. We like to prove + +\begin{center} +\begin{tabular}{l} +$P(r) \dn $ \hspace{4mm} $L(der\,c\,r) = Der\,c\,(L(r))$ +\end{tabular} +\end{center} + +\noindent +by induction over the regular expression $r$. + + +\newpage +\noindent +{\bf Proof} + +\noindent +According to 1.~above we need to prove $P(\varnothing)$, $P(\epsilon)$ and $P(d)$. Lets do this in turn. + +\begin{itemize} +\item First Case: $P(\varnothing)$ is $L(der\,c\,\varnothing) = Der\,c\,(L(\varnothing))$ (a). We have $der\,c\,\varnothing = \varnothing$ +and $L(\varnothing) = \varnothing$. We also have $Der\,c\,\varnothing = \varnothing$. Hence we have $\varnothing = \varnothing$ in (a). + +\item Second Case: $P(\epsilon)$ is $L(der\,c\,\epsilon) = Der\,c\,(L(\epsilon))$ (b). We have $der\,c\,\epsilon = \varnothing$, +$L(\varnothing) = \varnothing$ and $L(\epsilon) = \{\texttt{""}\}$. We also have $Der\,c\,\{\texttt{""}\} = \varnothing$. Hence we have +$\varnothing = \varnothing$ in (b). + +\item Third Case: $P(d)$ is $L(der\,c\,d) = Der\,c\,(L(d))$ (c). We need to treat the cases $d = c$ and $d \not= c$. + +$d = c$: We have $der\,c\,c = \epsilon$ and $L(\epsilon) = \{\texttt{""}\}$. +We also have $L(c) = \{\texttt{"}c\texttt{"}\}$ and $Der\,c\,\{\texttt{"}c\texttt{"}\} = \{\texttt{""}\}$. Hence we have +$\{\texttt{""}\} = \{\texttt{""}\}$ in (c). + +$d \not=c$: We have $der\,c\,d = \varnothing$. +We also have $Der\,c\,\{\texttt{"}d\texttt{"}\} = \varnothing$. Hence we have +$\varnothing = \varnothing$ in (c). +\end{itemize} + +\noindent +These were the easy base cases. Now come the inductive cases. + +\begin{itemize} +\item Fourth Case: $P(r_1 + r_2)$ is $L(der\,c\,(r_1 + r_2)) = Der\,c\,(L(r_1 + r_2))$ (d). This is what we have to show. +We can assume already: + +\begin{center} +\begin{tabular}{ll} +$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ +$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) +\end{tabular} +\end{center} + +We have that $der\,c\,(r_1 + r_2) = (der\,c\,r_1) + (der\,c\,r_2)$ and also $L((der\,c\,r_1) + (der\,c\,r_2)) = L(der\,c\,r_1) \cup L(der\,c\,r_2)$. +By (I) and (II) we know that the left-hand side is $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$. You need to ponder a bit, but you should see +that + +\begin{center} +$Der\,c(A \cup B) = (Der\,c\,A) \cup (Der\,c\,B)$ +\end{center} + +holds for every set of strings $A$ and $B$. That means the right-hand side of (d) is also $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$, +because $L(r_1 + r_2) = L(r_1) \cup L(r_2)$. And we are done with the fourth case. + +\item Fifth Case: $P(r_1 \cdot r_2)$ is $L(der\,c\,(r_1 \cdot r_2)) = Der\,c\,(L(r_1 \cdot r_2))$ (e). We can assume already: + +\begin{center} +\begin{tabular}{ll} +$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ +$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) +\end{tabular} +\end{center} + +Let us first consider the case where $nullable(r_1)$ holds. Then + +\[ +der\,c\,(r_1 \cdot r_2) = ((der\,c\,r_1) \cdot r_2) + (der\,c\,r_2). +\] + +The corresponding language of the right-hand side is + +\[ +(L(der\,c\,r_1) \,@\, L(r_2)) \cup L(der\,c\,r_2). +\] + +By the induction hypotheses (I) and (II), this is equal to + +\[ +(Der\,c\,(L(r_1)) \,@\, L(r_2)) \cup (Der\,c\,(L(r_2)).\;\;(**) +\] + +We also know that $L(r_1 \cdot r_2) = L(r_1) \,@\,L(r_2)$. We have to know what +$Der\,c\,(L(r_1) \,@\,L(r_2))$ is. + +Let us analyse what +$Der\,c\,(A \,@\, B)$ is for arbitrary sets of strings $A$ and $B$. If $A$ does \emph{not} +contain the empty string, then every string in $A\,@\,B$ is of the form $s_1 \,@\, s_2$ where +$s_1 \in A$ and $s_2 \in B$. So if $s_1$ starts with $c$ then we just have to remove it. Consequently, +$Der\,c\,(A \,@\, B) = (Der\,c\,(A)) \,@\, B$. This case does not apply here though, because we already +proved that if $r_1$ is nullable, then $L(r_1)$ contains the empty string. In this case, every string +in $A\,@\,B$ is either of the form $s_1 \,@\, s_2$, with $s_1 \in A$ and $s_2 \in B$, or +$s_3$ with $s_3 \in B$. This means $Der\,c\,(A \,@\, B) = ((Der\,c\,(A)) \,@\, B) \cup Der\,c\,B$. +But this proves that (**) is $Der\,c\,(L(r_1) \,@\, L(r_2))$. + +Similarly in the case where $r_1$ is \emph{not} nullable. + +\item Sixth Case: $P(r^*)$ is $L(der\,c\,(r^*)) = Der\,c\,L(r^*)$. We can assume already: + +\begin{center} +\begin{tabular}{ll} +$P(r)$: & $L(der\,c\,r) = Der\,c\,(L(r))$ (I) +\end{tabular} +\end{center} + +We have $der\,c\,(r^*) = der\,c\,r\cdot r^*$. Which means $L(der\,c\,(r^*)) = L(der\,c\,r\cdot r^*)$ and +further $L(der\,c\,r) \,@\, L(r^*)$. By induction hypothesis (I) we know that is equal to +$(Der\,c\,L(r)) \,@\, L(r^*)$. (*) + +\end{itemize} + + + + +Let us now analyse $Der\,c\,L(r^*)$, which is equal to $Der\,c\,((L(r))^*)$. Now $(L(r))^*$ is defined +as $\bigcup_{n \ge 0} L(r)$. We can write this as $L(r)^0 \cup \bigcup_{n \ge 1} L(r)$, where we just +separated the first union and then let the ``big-union'' start from $1$. Form this we can already infer + +\begin{center} +$Der\,c\,(L(r^*)) = Der\,c\,(L(r)^0 \cup \bigcup_{n \ge 1} L(r)) = (Der\,c\,L(r)^0) \cup Der\,c\,(\bigcup_{n \ge 1} L(r))$ +\end{center} + +The first union ``disappears'' since $Der\,c\,(L(r)^0) = \varnothing$. + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r e85600529ca5 -r 4794759139ea hw01.pdf Binary file hw01.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw01.tex --- a/hw01.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} - -\begin{document} - -\section*{Homework 1} - -\begin{enumerate} -\item {\bf (Optional)} If you want to run the code presented -in the lectures, install the -Scala programming language available (for free) from -\begin{center} -\url{http://www.scala-lang.org} -\end{center} - -\item {\bf (Optional)} Have a look at the crawler programs. -Can you find a usage for them in your daily programming life? - -\item In the context of the AFL-course, what is meant by the term \emph{language}? - -\item Give the definition for regular expressions. What is the meaning of a -regular expression? - -\item Assume the concatenation operation of two strings is written as $s_1 @ s_2$. -Define the operation of \emph{concatenating} two sets of strings. - -\item How is the power of a language defined? (Hint: There are two rules, one for $\_\!\_^0$ and -one for $\_\!\_^{n+1}$.) - -\item Given the regular expressions $r_1 = \epsilon$ and $r_2 = \varnothing$ and $r_3 = a$. -How many strings can the regular expressions $r_1^*$, $r_2^*$ and $r_3^*$ each match? - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea hw02.pdf Binary file hw02.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw02.tex --- a/hw02.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\begin{document} - -\section*{Homework 2} - -\begin{enumerate} -\item Give regular expressions for (a) decimal numbers and for (b) binary numbers. -(Hint: Observe that the empty string is not a number. Also observe that leading 0s -are normally not written.) - -\item Decide whether the following two regular expressions are equivalent $(\epsilon + a)^* \equiv^? a^*$ and -$(a \cdot b)^* \cdot a \equiv^? a \cdot (b \cdot a)^*$. - -\item Given the regular expression $r = (a \cdot b + b)^*$. Compute what the derivative of $r$ is with respect to -$a$ and $b$. Is $r$ nullable? - -\item What is a regular language? - -\item Prove that for all regular expressions $r$ we have -\begin{center} -$\text{nullable}(r)$ \quad if and only if \quad $\texttt{""} \in L(r)$ -\end{center} - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea hw03.pdf Binary file hw03.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw03.tex --- a/hw03.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\begin{document} - -\section*{Homework 3} - -\begin{enumerate} -\item Assume you have an alphabet consisting of the letters $a$, $b$ and $c$ only. -(a) Find a regular expression that recognises the two strings $ab$ and $ac$. (b) -Find a regular expression that matches all strings \emph{except} these two strings. -Note, you can only use regular expressions of the form -\begin{center} -$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ -\end{center} - -\item Define the function $zeroable$ which takes a regular expression as argument -and returns a boolean.\footnote{In an earlier version there was an error.} The -function should satisfy the following property: -\begin{center} -$zeroable(r)$ \;if and only if\; $L(r) = \varnothing$ -\end{center} - -\item Define the tokens and regular expressions for a language -consisting of numbers, left-parenthesis (, right-parenthesis ), -identifiers and the operations $+$, $-$ and $*$. Can the following strings -in this language be lexed? - -\begin{itemize} -\item \texttt{"}$(a + 3) * b$\texttt{"} -\item \texttt{"}$)()++ -33$\texttt{"} -\item \texttt{"}$(a / 3) * 3$\texttt{"} -\end{itemize} - - -In case they can, can you give the corresponding token sequences. -\end{enumerate} - - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea hw04.pdf Binary file hw04.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw04.tex --- a/hw04.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 4} - -\begin{enumerate} -\item Why is every finite set of strings a regular language? - -\item What is the language recognised by the regular expressions $(\varnothing^*)^*$. - -\item If a regular expression $r$ does not contain any occurrence of $\varnothing$ -is it possible for $L(r)$ to be empty? - -\item Assume that $s^{-1}$ stands for the operation of reversing a -string $s$. Given the following \emph{reversing} function on regular -expressions - -\begin{center} -\begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l} -$rev(\varnothing)$ & $\dn$ & $\varnothing$\\ -$rev(\epsilon)$ & $\dn$ & $\epsilon$\\ -$rev(c)$ & $\dn$ & $c$\\ -$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\ -$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\ -$rev(r^*)$ & $\dn$ & $rev(r)^*$\\ -\end{tabular} -\end{center} - - -and the set - -\begin{center} -$Rev\,A \dn \{s^{-1} \;|\; s \in A\}$ -\end{center} - -prove whether - -\begin{center} -$L(rev(r)) = Rev (L(r))$ -\end{center} - -holds. - -\item Give a regular expression over the alphabet $\{a,b\}$ recognising all strings -that do not contain any substring $bb$ and end in $a$. - -\item Assume the delimiters for comments are \texttt{$\slash$*} and \texttt{*$\slash$}. -Give a regular expression that can recognise comments -of the form - -\begin{center} -\texttt{$\slash$*~\ldots{}~*$\slash$} -\end{center} - -where the three dots stand for arbitrary characters, but not comment delimiters. -(Hint: You can assume you are already given a regular expression written \texttt{ALL}, -that can recognise any character, and a regular expression \texttt{NOT} that recognises -the complement of a regular expression.) - -\item Given the alphabet $\{a,b\}$. Draw the automaton that has two states, say $q_0$ and $q_1$. -The starting state is $q_0$ and the final state is $q_1$. The transition -function is given by - -\begin{center} -\begin{tabular}{l} -$(q_0, a) \rightarrow q_0$\\ -$(q_0, b) \rightarrow q_1$\\ -$(q_1, b) \rightarrow q_1$ -\end{tabular} -\end{center} - -What is the languages recognised by this automaton? - -\item Give a deterministic finite automaton that can recognise -the language $L(a^*\cdot b\cdot b^*)$. - - -\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as -argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so -that it filters out all comments and whitespace from the result. - -\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it -implements the \texttt{findAll} function. This function takes a regular -expressions and a string, and returns all substrings in this string that -match the regular expression. -\end{enumerate} - -% explain what is a context-free grammar and the language it generates -% -% -% Define the language L(M) accepted by a deterministic finite automaton M. -% -% -% does (a + b)*b+ and (a*b+) + (b*b+) define the same language - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea hw05.pdf Binary file hw05.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw06.pdf Binary file hw06.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw06.tex --- a/hw06.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 6} - -\begin{enumerate} -\item (i) Give the regular expressions for lexing a language -consisting of whitespaces, identifiers (some letters followed by digits), numbers, -operations \texttt{=}, \texttt{<} and \texttt{>}, and the keywords -\texttt{if}, \texttt{then} and \texttt{else}. -(ii) Decide whether the following strings -can be lexed in this language? - -\begin{enumerate} -\item \texttt{"if y4 = 3 then 1 else 3"} -\item \texttt{"if33 ifif then then23 else else 32"} -\item \texttt{"if x4x < 33 then 1 else 3"} -\end{enumerate} - -In case they can, give the corresponding token sequences. (Hint: -Observe the maximal munch rule and priorities of your regular -expressions that make the process of lexing unambiguous.) - -\item Suppose the grammar - -\begin{center} -\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F \;|\; F \cdot \backslash \cdot F$\\ -$F$ & $\rightarrow$ & $T \;|\; T \cdot \texttt{+} \cdot T \;|\; T \cdot \texttt{-} \cdot T$\\ -$T$ & $\rightarrow$ & $num \;|\; \texttt{(} \cdot E \cdot \texttt{)}$\\ -\end{tabular} -\end{center} - -where $E$, $F$ and $T$ are non-terminals, $E$ is the starting symbol of the grammar, and $num$ stands for -a number token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. - -\item Define what it means for a grammar to be ambiguous. Give an example of -an ambiguous grammar. - -\item Suppose boolean expressions are built up from - -\begin{center} -\begin{tabular}{ll} -1.) & tokens for \texttt{true} and \texttt{false},\\ -2.) & the infix operations \texttt{$\wedge$} and \texttt{$\vee$},\\ -3.) & the prefix operation $\neg$, and\\ -4.) & can be enclosed in parentheses. -\end{tabular} -\end{center} - -(i) Give a grammar that can recognise such boolean expressions -and (ii) give a sample string involving all rules given in 1.-4.~that -can be parsed by this grammar. - - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea hw07.pdf Binary file hw07.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw07.tex --- a/hw07.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 7} - -\begin{enumerate} -\item Suppose the following finite deterministic automaton over the alphabet $\{0, 1\}$. - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; - \node[state, accepting] (q1) at ( 1,1) {$q_1$}; - \node[state] (q2) at ( 2,1) {$q_2$}; - \path[->] (q0) edge[bend left] node[above] {$0$} (q1) - (q1) edge[bend left] node[above] {$1$} (q0) - (q2) edge[bend left=50] node[below] {$1$} (q0) - (q1) edge node[above] {$0$} (q2) - (q2) edge [loop right] node {$0$} () - (q0) edge [loop below] node {$1$} () - ; -\end{tikzpicture} -\end{center} - -Give a regular expression that can recognise the same language as -this automaton. (Hint: If you use Brzozwski's method, you can assume -Arden's lemma which states that an equation of the form $q = q\cdot r + s$ -has the unique solution $q = s \cdot r^*$.) - -\item Consider the following grammar - -\begin{center} -\begin{tabular}{l} -$S \rightarrow N\cdot P$\\ -$P \rightarrow V\cdot N$\\ -$N \rightarrow N\cdot N$\\ -$N \rightarrow A \cdot N$\\ -$N \rightarrow \texttt{student} \;|\; \texttt{trainer} \;|\; \texttt{team} \;|\; \texttt{trains}$\\ -$V \rightarrow \texttt{trains} \;|\; \texttt{team}$\\ -$A \rightarrow \texttt{The} \;|\; \texttt{the}$\\ -\end{tabular} -\end{center} - -where $S$ is the start symbol and $S$, $P$, $N$, $V$ and $A$ are non-terminals. -Using the CYK-algorithm, check whether or not the following string can be parsed -by the grammar: - -\begin{center} -\texttt{The trainer trains the student team} -\end{center} - -\item {\bf (Optional)} The task is to match strings where the letters are in alphabetical order---for example, -\texttt{abcfjz} would pass, but \texttt{acb} would not. Whitespace should be ignored---for example -\texttt{ab c d} should pass. The point is to try to get the regular expression as short as possible! -See: - -\begin{center} -\url{http://callumacrae.github.com/regex-tuesday/challenge11.html} -\end{center} -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea hw08.pdf Binary file hw08.pdf has changed diff -r e85600529ca5 -r 4794759139ea hw08.tex --- a/hw08.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 8} - -\begin{enumerate} -\item Suppose the following grammar for the WHILE-language: - -\begin{center} -\begin{tabular}{lcl} -$Stmt$ & $\rightarrow$ & $\text{skip}$\\ - & $|$ & $Id := AExp$\\ - & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ - & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ -$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ - & $|$ & $Stmt$\medskip\\ -$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ - & $|$ & $Stmt$\medskip\\ -$AExp$ & $\rightarrow$ & $AExp + AExp$\\ - & $|$ & $AExp * AExp$\\ - & $|$ & $( AExp )$\\ - & $|$ & $Num$\\ - & $|$ & $Id$\medskip\\ -$BExp$ & $\rightarrow$ & $AExp = AExp$\\ - & $|$ & $AExp \not= AExp$\\ - & $|$ & $\text{false}$\\ - & $|$ & $\text{true}$\\ - -\end{tabular} -\end{center} - -Transform this grammar into Chomsky normalform. - -\item Write a program in the WHILE-language that calculates the factorial function. - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea progs/LOOP.j --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/LOOP.j Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,90 @@ + +.class public LOOP.LOOP +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +ldc 750000 +istore 0 +iload 0 +istore 1 +iload 0 +istore 2 +iload 0 +istore 3 + +Loop_begin_48: + +ldc 0 +iload 1 +if_icmpge Loop_end_49 + +Loop_begin_50: + +ldc 0 +iload 2 +if_icmpge Loop_end_51 + +Loop_begin_52: + +ldc 0 +iload 3 +if_icmpge Loop_end_53 +iload 3 +ldc 1 +isub +istore 3 +goto Loop_begin_52 + +Loop_end_53: + +iload 0 +istore 3 +iload 2 +ldc 1 +isub +istore 2 +goto Loop_begin_50 + +Loop_end_51: + +iload 0 +istore 2 +iload 1 +ldc 1 +isub +istore 1 +goto Loop_begin_48 + +Loop_end_49: + +iload 1 +invokestatic LOOP/LOOP/write(I)V +iload 2 +invokestatic LOOP/LOOP/write(I)V +iload 3 +invokestatic LOOP/LOOP/write(I)V + + + return + +.end method diff -r e85600529ca5 -r 4794759139ea progs/S_grammar-token.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/S_grammar-token.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,47 @@ +//:load matcher.scala +//:load parser3.scala + +abstract class Token +case object T_ONE extends Token + +val lexing_rules : List[Rule[Token]] = + List(("1", (s: List[Char]) => T_ONE)) + +val T = Tokenizer(lexing_rules) + +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tokparser(t: Token) = TokParser(t) + +case object EmpParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = Set(("", ts)) +} + + +lazy val Su: Parser[List[Token], String] = + (T_ONE ~ Su) ==> { case (x, y) => "1" + y} || EmpParser + + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +def test(i: Int) = { + val result = Su.parse_all(T.fromString("1" * i)) + //print(result.size + " ") +} + + +for (i <- 1 to 1000 by 50) { + print(i + " ") + print("%.5f".format(time_needed(1, test(i)))) + print("\n") +} + diff -r e85600529ca5 -r 4794759139ea progs/S_grammar.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/S_grammar.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,52 @@ +//:load parser3.scala + +case class StringParser(s: String) extends Parser[String, String] { + def parse(ts: String) = { + if (s.length <= ts.length && ts.startsWith(s)) Set((s, ts.drop(s.length))) + else Set() + } +} + +implicit def string2parser(s: String) = StringParser(s) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// unambiguous grammar + +lazy val U: Parser[String, String] = + ("1" ~ U) ==> { case (x, y) => "1" + y} || "" + +def test1(i: Int) = { + val result = U.parse_all("1" * i) + //print(result.size + " ") +} + +for (i <- 1 to 1000 by 50) { + print(i + " ") + print("%.5f".format(time_needed(1, test1(i)))) + print("\n") +} + + + +// ambiguous grammar +// n = 16 -> over 35 million parse trees + +lazy val S: Parser[String, String] = + ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "" + +def test2(i: Int) = { + val result = S.parse_all("1" * i) + print(result.size + " ") +} + +for (i <- 1 to 30) { + print(i + " ") + print("%.5f".format(time_needed(1, test2(i)))) + print("\n") +} diff -r e85600529ca5 -r 4794759139ea progs/Term_grammar.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/Term_grammar.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,62 @@ +//:load matcher.scala +//:load parser3.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been lexed +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +lazy val E: Parser[List[Token], Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T +lazy val T: Parser[List[Token], Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F +lazy val F: Parser[List[Token], Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser + +println(E.parse_all(Tk.fromString("1 + 2 + 3"))) +println(E.parse_all(Tk.fromString("1 + 2 * 3"))) +println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) +println(E.parse_all(Tk.fromString("(14 + 2) * (3 + 2)"))) + diff -r e85600529ca5 -r 4794759139ea progs/app0.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app0.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,6 @@ +import io.Source + +def get_page(url: String) : String = { + Source.fromURL(url).take(10000).mkString + + diff -r e85600529ca5 -r 4794759139ea progs/app1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,12 @@ +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + diff -r e85600529ca5 -r 4794759139ea progs/app2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app2.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,16 @@ +val http_pattern = """\"https?://[^\"]*\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + diff -r e85600529ca5 -r 4794759139ea progs/app3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app3.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,10 @@ +val my_urls = """urbanc""".r + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} diff -r e85600529ca5 -r 4794759139ea progs/app4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app4.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,14 @@ +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r +val email_pattern = + """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page)) crawl(u, n - 1) + } +} diff -r e85600529ca5 -r 4794759139ea progs/app5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app5.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,8 @@ +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} diff -r e85600529ca5 -r 4794759139ea progs/app51.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app51.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,8 @@ +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp diff -r e85600529ca5 -r 4794759139ea progs/app6.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app6.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,11 @@ +def deriv (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) + else SEQ(deriv(r1, c), r2) + case STAR(r) => SEQ(deriv(r, c), STAR(r)) +} + diff -r e85600529ca5 -r 4794759139ea progs/app7.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app7.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,16 @@ +abstract class Parser[I, T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) + yield head + + def || (right : => Parser[I, T]) : Parser[I, T] = + new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = + new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = + new SeqParser(this, right) +} + + diff -r e85600529ca5 -r 4794759139ea progs/app8.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app8.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,23 @@ +class SeqParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S]) + extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) + yield ((head1, head2), tail2) +} + +class AltParser[I, T](p: => Parser[I, T], + q: => Parser[I, T]) + extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I, T, S](p: => Parser[I, T], f: T => S) + extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) + yield (f(head), tail) +} + + diff -r e85600529ca5 -r 4794759139ea progs/app9.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/app9.while Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,15 @@ +/* Fibonacci Program + input: n + */ + +n := 19; +minus1 := 0; +minus2 := 1; +while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 +}; +write minus2 + diff -r e85600529ca5 -r 4794759139ea progs/automata.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,106 @@ + +// a class for deterministic finite automata, +// the type of states is kept polymorphic + +case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { + + // the transition function lifted to list of characters + def deltas(q: A, cs: List[Char]) : Either[A, String] = + if (states.contains(q)) cs match { + case Nil => Left(q) + case c::cs => + if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) + else Right(q + " does not have a transition for " + c) + } + else Right(q + " is not a state of the automaton") + + // wether a string is accepted by the automaton + def accepts(s: String) = deltas(start, s.toList) match { + case Left(q) => fins.contains(q) + case _ => false + } +} + + +// translating a regular expression into a finite +// automaton + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +implicit def string2rexp(s : String) = { + def chars2rexp (cs: List[Char]) : Rexp = cs match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::cs => SEQ(CHAR(c), chars2rexp(cs)) + } + chars2rexp(s.toList) +} + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def der (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) + else SEQ(der(r1, c), r2) + case STAR(r) => SEQ(der(r, c), STAR(r)) +} + + +// Here we construct an automaton whose +// states are regular expressions +type State = Rexp +type States = Set[State] +type Transition = Map[(State, Char), State] + +// we use as an alphabet all lowercase letters +val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet + +def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { + val q_der : State = der(q, c) + if (qs.contains(q_der)) (qs, delta + ((q, c) -> q)) + else explore(qs + q_der, delta + ((q, c) -> q_der), q_der) +} + +def explore (qs: States, delta: Transition, q: State) : (States, Transition) = + alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) + + +def mk_automaton (r: Rexp) : Automaton[Rexp] = { + val (qs, delta) = explore(Set(r), Map(), r); + val fins = for (q <- qs if nullable(q)) yield q; + Automaton[Rexp](r, qs, delta, fins) +} + +val A = mk_automaton(ALT("ab","ac")) + +A.start +A.states.toList.length + +println(A.accepts("bd")) +println(A.accepts("ab")) +println(A.accepts("ac")) + +val r1 = STAR(ALT("a","b")) +val r2 = SEQ("b","b") +val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a") +val B = mk_automaton(r3) + +B.start +B.states.toList.length diff -r e85600529ca5 -r 4794759139ea progs/automata1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,95 @@ + +// a class for deterministic finite automata, +// the type of states is kept polymorphic + +case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { + + // the transition function lifted to list of characters + def deltas(q: A, cs: List[Char]) : A = + if (states.contains(q)) cs match { + case Nil => q + case c::cs => + if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) + else throw new RuntimeException(q + " does not have a transition for " + c) + } + else throw new RuntimeException(q + " is not a state of the automaton") + + // wether a string is accepted by the automaton + def accepts(s: String) = + try { + fins.contains(deltas(start, s.toList)) + } catch { + case e:RuntimeException => false + } +} + + + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +implicit def string2rexp(s : String) = { + def chars2rexp (cs: List[Char]) : Rexp = cs match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::cs => SEQ(CHAR(c), chars2rexp(cs)) + } + chars2rexp(s.toList) +} + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def der (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) + else SEQ(der(r1, c), r2) + case STAR(r) => SEQ(der(r, c), STAR(r)) +} + + +// Here we construct an automaton whose +// states are regular expressions +type State = Rexp +type States = Set[State] +type Transition = Map[(State, Char), State] + +def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { + val qc : State = der(q, c) + if (qs.contains(qc)) (qs, delta + ((q, c) -> q)) + else explore(qs + qc, delta + ((q, c) -> qc), qc) +} + +// we use as alphabet all lowercase letters +val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet + +def explore (qs: States, delta: Transition, q: State) : (States, Transition) = + alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) + + +def mk_automaton (r: Rexp) : Automaton[Rexp] = { + val (qs, delta) = explore(Set(r), Map(), r); + val fins = for (q <- qs if nullable(q)) yield q; + Automaton[Rexp](r, qs, delta, fins) +} + +val A = mk_automaton(ALT("ab","ac")) + +println(A.accepts("bd")) +println(A.accepts("ab")) +println(A.accepts("ac")) diff -r e85600529ca5 -r 4794759139ea progs/compile.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/compile.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,326 @@ +// A parser and evaluator for teh while language +// +import matcher._ +import parser._ + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Relop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// compiler +val beginning = """ +.class public XXX.XXX +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +""" + +val ending = """ + + return + +.end method +""" + +// for generating new labels +var counter = -1 + +def Fresh(x: String) = { + counter += 1 + x ++ "_" ++ counter.toString() +} + +type Env = Map[String, String] +type Instrs = List[String] + +def compile_aexp(a: AExp, env : Env) : Instrs = a match { + case Num(i) => List("ldc " + i.toString + "\n") + case Var(s) => List("iload " + env(s) + "\n") + case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") + case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") + case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") +} + +def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { + case True => Nil + case False => List("goto " + jmp + "\n") + case Relop("=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") + case Relop("!=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") + case Relop("<", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") +} + + +def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { + case Skip => (Nil, env) + case Assign(x, a) => { + val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString + (compile_aexp(a, env) ++ + List("istore " + index + "\n"), env + (x -> index)) + } + case If(b, bl1, bl2) => { + val if_else = Fresh("If_else") + val if_end = Fresh("If_end") + val (instrs1, env1) = compile_bl(bl1, env) + val (instrs2, env2) = compile_bl(bl2, env1) + (compile_bexp(b, env, if_else) ++ + instrs1 ++ + List("goto " + if_end + "\n") ++ + List("\n" + if_else + ":\n\n") ++ + instrs2 ++ + List("\n" + if_end + ":\n\n"), env2) + } + case While(b, bl) => { + val loop_begin = Fresh("Loop_begin") + val loop_end = Fresh("Loop_end") + val (instrs1, env1) = compile_bl(bl, env) + (List("\n" + loop_begin + ":\n\n") ++ + compile_bexp(b, env, loop_end) ++ + instrs1 ++ + List("goto " + loop_begin + "\n") ++ + List("\n" + loop_end + ":\n\n"), env1) + } + case Write(x) => + (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) +} + +def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { + case Nil => (Nil, env) + case s::bl => { + val (instrs1, env1) = compile_stmt(s, env) + val (instrs2, env2) = compile_bl(bl, env1) + (instrs1 ++ instrs2, env2) + } +} + +def compile(input: String) : String = { + val class_name = input.split('.')(0) + val tks = Tok.fromFile(input) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) +} + + +def compile_to(input: String, output: String) = { + val fw = new java.io.FileWriter(output) + fw.write(compile(input)) + fw.close() +} + +// +val tks = Tok.fromString("x := x + 1") +val ast = Stmt.parse_single(tks) +println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) + + + +//examples + +compile_to("loops.while", "loops.j") +//compile_to("fib.while", "fib.j") + + +// testing cases for time measurements + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// for testing +import scala.sys.process._ + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z +""" + + +def compile_test(n: Int) : Unit = { + val class_name = "LOOP" + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) + val fw = new java.io.FileWriter(class_name + ".j") + fw.write(assembly) + fw.close() + val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! + println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) +} + +List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) + + + +// Javabyte code assmbler +// +// java -jar jvm/jasmin-2.4/jasmin.jar loops.j + + + + + + diff -r e85600529ca5 -r 4794759139ea progs/crawler.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/crawler.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,144 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// non-existing page -> returns the empty string +get_page("""http://www.foobar.com""") + + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// starts with an " +// then either http or https +// then :// +// then any character that is not " +// finally " +val http_pattern = """\"((?:http|https)://(?:[^\"])*)\"""".r +val http_pattern = """\"(https?://[^\"]*)\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// get all urls in startURL +get_all_URLs(get_page(startURL)) + +// number of all urls in startURL +get_all_URLs(get_page(startURL)).toList.length + + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +crawl(startURL, 2) + + +//breadth-first version without visiting +//pages twice +def bf_crawl(todo: Set[String], visited: Set[String], n: Int) : Unit = { + if (n == 0) () + else { + val new_todo = todo.flatMap { + url => { + if (visited.contains(url)) Set[String]() + else { + println("Visiting: " + n + " " + url) + get_all_URLs(get_page(url)) + } + } + } + bf_crawl(new_todo, visited union todo, n - 1) + } +} + +bf_crawl(Set(startURL1), Set(), 2) + + +//breadth-first version without visiting +//pages twice and only in "my" domain +val my_pattern = """urbanc""".r + +// breadth first search avoiding double searches +def bf_crawl2(todo: Set[String], visited: Set[String], n: Int) : Unit = { + if (n == 0) () + else { + val new_todo = todo.flatMap { + url => { + if (visited.contains(url)) Set[String]() + else if (my_pattern.findFirstIn(url) == None) Set[String]() + else { + println("Visiting: " + n + " " + url); + get_all_URLs(get_page(url)) + } + } + } + bf_crawl2(new_todo, visited union todo, n - 1) + } +} + +bf_crawl2(Set(startURL1), Set(), 5) + +// email harvester +// from +// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ + +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +def bf_crawl3(todo: Set[String], visited: Set[String], n: Int) : Unit = { + if (n == 0) () + else { + val new_todo = todo.flatMap { + url => { + if (visited.contains(url)) Set[String]() + else { + println("Visiting: " + n + " " + url); + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + get_all_URLs(get_page(url)) + } + } + } + bf_crawl3(new_todo, visited union todo, n - 1) + } +} + +bf_crawl3(Set(startURL1), Set(), 3) + + +// depth-first version does not work, +// because it might visit pages at depth 1 +// while it still wants to visit them at +// depth 2 +var visited = Set("") + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (visited.contains(url)) () //println("Already visited: " + n + " " + url) + else { + println("Visiting: " + n + " " + url); + visited += url + for (u <- getAllURLs(getURLpage(url))) crawl(u, n - 1); + } +} diff -r e85600529ca5 -r 4794759139ea progs/crawler1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/crawler1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,45 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" +//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/""" + + +// call on the command line +crawl(startURL, 2) + +crawl("""http://www.dcs.kcl.ac.uk/staff/urbanc/msc-projects-12.html""", 2) diff -r e85600529ca5 -r 4794759139ea progs/crawler2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/crawler2.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,44 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +// can now deal with depth 3 +// start on command line +crawl(startURL, 4) + diff -r e85600529ca5 -r 4794759139ea progs/crawler3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/crawler3.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,49 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + //else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page)) crawl(u, n - 1) + } +} + +// can now deal with depth 3 +// start on command line +crawl(startURL, 3) + diff -r e85600529ca5 -r 4794759139ea progs/fib.j --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fib.j Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,60 @@ + +.class public fib.fib +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +ldc 19 +istore 0 +ldc 0 +istore 1 +ldc 1 +istore 2 + +Loop_begin_0: + +ldc 0 +iload 0 +if_icmpge Loop_end_1 +iload 2 +istore 3 +iload 1 +iload 2 +iadd +istore 2 +iload 3 +istore 1 +iload 0 +ldc 1 +isub +istore 0 +goto Loop_begin_0 + +Loop_end_1: + +iload 2 +invokestatic fib/fib/write(I)V + + + return + +.end method diff -r e85600529ca5 -r 4794759139ea progs/fib.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fib.while Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,15 @@ +/* Fibonacci Program + input: n +*/ + +n := 19; +minus1 := 0; +minus2 := 1; +while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 +}; +write minus2 + diff -r e85600529ca5 -r 4794759139ea progs/html.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/html.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,99 @@ + +//:load matcher.scala + +// some regular expressions +val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") +val WORD = PLUS(SYM) + +val BTAG = SEQS("<", WORD, ">") +val ETAG = SEQS("") + +val WHITESPACE = PLUS(RANGE(" \n")) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_WORD(s: String) extends Token +case class T_ETAG(s: String) extends Token +case class T_BTAG(s: String) extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +val lexing_rules: List[Rule[Token]] = + List((BTAG, (s) => T_BTAG(s.mkString)), + (ETAG, (s) => T_ETAG(s.mkString)), + (WORD, (s) => T_WORD(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE)) + +// the tokenizer +val T = Tokenizer(lexing_rules) + +// width for printing +val WIDTH = 60 + + +def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { + case Nil => println(Console.RESET) + case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) + case T_WORD(s)::rest => { + val newstr = Console.RESET + ctr.reverse.mkString + s + if (c + s.length < WIDTH) { + print(newstr); + interpret(rest, c + s.length, ctr) + } + else { + print("\n" + newstr) + interpret(rest, s.length, ctr) + } + } + case T_BTAG("

")::rest => print("\n"); interpret(rest, 0, ctr) + case T_ETAG("

")::rest => print("\n"); interpret(rest, 0, ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) + case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) + case _::rest => interpret(rest, c, ctr) +} + +val test_string = """ +MSc Projects + +
+start of paragraph. a cyan word normal again something longer. +
+ + +
Description: + Regular expressions are extremely useful for many text-processing tasks such as finding patterns in texts, + lexing programs, syntax highlighting and so on. Given that regular expressions were + introduced in 1950 by Stephen Kleene, you might think + regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still + an active research area. For example + this paper + about regular expression matching and partial derivatives was presented this summer at the international + PPDP'12 conference. The task in this project is to implement the results from this paper.
+ +
The background for this project is that some regular expressions are + evil + and can stab you in the back; according to + this blog post. + For example, if you use in Python or + in Ruby (probably also in other mainstream programming languages) the + innocently looking regular expression a?{28}a{28} and match it, say, against the string + aaaaaaaaaaaaaaaaaaaaaaaaaaaa (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact, + Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself: + re.py (Python version) and + re.rb + (Ruby version). You can imagine an attacker + mounting a nice DoS attack against + your program if it contains such an evil regular expression. Actually + Scala (and also Java) are almost immune from such + attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale + the regular expression and string further to, say, 4,600 as, then you get a + StackOverflowError + potentially crashing your program. +
+""" + +interpret(T.fromString(test_string), 0, Nil) diff -r e85600529ca5 -r 4794759139ea progs/html1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/html1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,75 @@ + + +//:load matcher.scala + + +// some regular expressions +val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) +val BTAG = SEQS("<", NAME, ">") +val ETAG = SEQS("") + +val WORD = PLUS(ALT(LETTER, DIGIT)) +val WHITESPACE = PLUS(RANGE(" \n")) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_WORD(s: String) extends Token +case class T_ETAG(s: String) extends Token +case class T_BTAG(s: String) extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = + tokenize(rs, s.toList) + + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((BTAG, (s) => T_BTAG(s.mkString)), + (ETAG, (s) => T_ETAG(s.mkString)), + (WORD, (s) => T_WORD(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE)) + +val ts = tokenize_file(lexing_rules, "test.html") + + +val WIDTH = 60 + +def is_tag(t: Token) = t match { + case T_BTAG(_) => true + case T_ETAG(_) => true + case _ => false +} + +def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { + case Nil => println(Console.RESET) + case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) + case T_WORD(s)::rest => { + val newc = c + s.length + val newstr = Console.RESET + ctr.reverse.mkString + s + if (newc < WIDTH) { + print(newstr); + interpret(rest, newc, ctr) + } + else { + print("\n" + newstr) + interpret(rest, s.length, ctr) + } + } + case T_BTAG("
")::rest => print("\n"); interpret(rest, 0, ctr) + case T_ETAG("
")::rest => print("\n"); interpret(rest, 0, ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) + case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) + case _::rest => interpret(rest, c, ctr) +} + +interpret(ts, 0, Nil) diff -r e85600529ca5 -r 4794759139ea progs/i.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/i.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,492 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLC extends Rexp // recognises any character +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp // negation of a regular expression + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLC => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// regular expression for specifying +// ranges of characters +def Range(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// many alternatives +def Alts(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => ALT(r, Alts(rs)) +} +def ALTS(rs: Rexp*) = Alts(rs.toList) + +// repetitions +def Seqs(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => SEQ(r, Seqs(rs)) +} +def SEQS(rs: Rexp*) = Seqs(rs.toList) + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +type Rule[T] = (Rexp, List[Char] => T) + +case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { + + def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + + def one_token(s: List[Char]) : Either[(List[Char], T), String] = { + val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten + if (somes == Nil) Right(s.mkString) + else Left(somes sortBy (_._1.length) head) + } + + def tokenize(cs: List[Char]) : List[T] = cs match { + case Nil => Nil + case _ => one_token(cs) match { + case Left((rest, token)) => token :: tokenize(rest) + case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } + } + } + + def fromString(s: String) : List[T] = + tokenize(s.toList).filterNot(excl.contains(_)) + + def fromFile(name: String) : List[T] = + fromString(io.Source.fromFile(name).mkString) + +} + + +// parser combinators with input type I and return type T + +abstract class Parser[I <% Seq[_], T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head + + def parse_single(ts: I) : T = parse_all(ts).toList match { + case t::Nil => t + case _ => { println ("Parse Error") ; sys.exit(-1) } + } + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +// A parser and evaluator for teh while language +// +//:load matcher.scala +//:load parser3.scala + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[Rule[Token]] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Relop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// compiler +val beginning = """ +.class public XXX.XXX +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +""" + +val ending = """ + + return + +.end method +""" + +// for generating new labels +var counter = -1 + +def Fresh(x: String) = { + counter += 1 + x ++ "_" ++ counter.toString() +} + +type Env = Map[String, String] +type Instrs = List[String] + +def compile_aexp(a: AExp, env : Env) : Instrs = a match { + case Num(i) => List("ldc " + i.toString + "\n") + case Var(s) => List("iload " + env(s) + "\n") + case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") + case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") + case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") +} + +def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { + case True => Nil + case False => List("goto " + jmp + "\n") + case Relop("=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") + case Relop("!=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") + case Relop("<", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") +} + + +def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { + case Skip => (Nil, env) + case Assign(x, a) => { + val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString + (compile_aexp(a, env) ++ + List("istore " + index + "\n"), env + (x -> index)) + } + case If(b, bl1, bl2) => { + val if_else = Fresh("If_else") + val if_end = Fresh("If_end") + val (instrs1, env1) = compile_bl(bl1, env) + val (instrs2, env2) = compile_bl(bl2, env1) + (compile_bexp(b, env, if_else) ++ + instrs1 ++ + List("goto " + if_end + "\n") ++ + List("\n" + if_else + ":\n\n") ++ + instrs2 ++ + List("\n" + if_end + ":\n\n"), env2) + } + case While(b, bl) => { + val loop_begin = Fresh("Loop_begin") + val loop_end = Fresh("Loop_end") + val (instrs1, env1) = compile_bl(bl, env) + (List("\n" + loop_begin + ":\n\n") ++ + compile_bexp(b, env, loop_end) ++ + instrs1 ++ + List("goto " + loop_begin + "\n") ++ + List("\n" + loop_end + ":\n\n"), env1) + } + case Write(x) => + (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) +} + +def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { + case Nil => (Nil, env) + case s::bl => { + val (instrs1, env1) = compile_stmt(s, env) + val (instrs2, env2) = compile_bl(bl, env1) + (instrs1 ++ instrs2, env2) + } +} + +def compile(input: String) : String = { + val class_name = input.split('.')(0) + val tks = Tok.fromFile(input) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) +} + + +def compile_to(input: String, output: String) = { + val fw = new java.io.FileWriter(output) + fw.write(compile(input)) + fw.close() +} + +// +val tks = Tok.fromString("x := x + 1") +val ast = Stmt.parse_single(tks) +println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) + + + +//examples + +compile_to("loops.while", "loops.j") +//compile_to("fib.while", "fib.j") + + +// testing cases for time measurements +/* +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// for testing +import scala.sys.process._ + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z +""" + + +def compile_test(n: Int) : Unit = { + val class_name = "LOOP" + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) + val fw = new java.io.FileWriter(class_name + ".j") + fw.write(assembly) + fw.close() + val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! + println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) +} + +List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) + + + +// javabyte code assmbler +// +// java -jar jvm/jasmin-2.4/jasmin.jar loops.j + +*/ + + + + + diff -r e85600529ca5 -r 4794759139ea progs/loops.j --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/loops.j Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,90 @@ + +.class public loops.loops +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +ldc 1000 +istore 0 +iload 0 +istore 1 +iload 0 +istore 2 +iload 0 +istore 3 + +Loop_begin_0: + +ldc 0 +iload 1 +if_icmpge Loop_end_1 + +Loop_begin_2: + +ldc 0 +iload 2 +if_icmpge Loop_end_3 + +Loop_begin_4: + +ldc 0 +iload 3 +if_icmpge Loop_end_5 +iload 3 +ldc 1 +isub +istore 3 +goto Loop_begin_4 + +Loop_end_5: + +iload 0 +istore 3 +iload 2 +ldc 1 +isub +istore 2 +goto Loop_begin_2 + +Loop_end_3: + +iload 0 +istore 2 +iload 1 +ldc 1 +isub +istore 1 +goto Loop_begin_0 + +Loop_end_1: + +iload 1 +invokestatic loops/loops/write(I)V +iload 2 +invokestatic loops/loops/write(I)V +iload 3 +invokestatic loops/loops/write(I)V + + + return + +.end method diff -r e85600529ca5 -r 4794759139ea progs/loops.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/loops.while Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,18 @@ +start := 1000; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z diff -r e85600529ca5 -r 4794759139ea progs/matcher.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/matcher.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,130 @@ +package object matcher { + +// regular expressions +// including constructors for NOT and ALLC +sealed abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLC extends Rexp // recognises any character +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp // negation of a regular expression + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLC => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// main class for the tokenizer +case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) { + +def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + +def one_token(s: List[Char]) : Either[(List[Char], T), String] = { + val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten + if (somes == Nil) Right(s.mkString) + else Left(somes sortBy (_._1.length) head) +} + +def tokenize(cs: List[Char]) : List[T] = cs match { + case Nil => Nil + case _ => one_token(cs) match { + case Left((rest, token)) => token :: tokenize(rest) + case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } + } +} + +def fromString(s: String) : List[T] = + tokenize(s.toList).filterNot(excl.contains(_)) + +def fromFile(name: String) : List[T] = + fromString(io.Source.fromFile(name).mkString) + +} + + +// regular expression for specifying +// ranges of characters +def Range(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// many alternatives +def Alts(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => ALT(r, Alts(rs)) +} +def ALTS(rs: Rexp*) = Alts(rs.toList) + +// repetitions +def Seqs(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => SEQ(r, Seqs(rs)) +} +def SEQS(rs: Rexp*) = Seqs(rs.toList) + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + +} diff -r e85600529ca5 -r 4794759139ea progs/mllex.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/mllex.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,109 @@ +:load matcher.scala + + +// some regular expressions +val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", + "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", + "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn", + "fun", "functor", "handle", "if", "in", "include", "infix", "infixr", + "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec", + "sharing", "sig", "signature", "struct", "structure", "then", "type", + "val", "where", "while", "with", "withtype")) + +val DIGITS = RANGE("0123456789") +val NONZERODIGITS = RANGE("123456789") + +val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES) + +val ALL = ALTS(KEYWORDS, INTEGERS) + +val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/") + + + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n".toList)) +val OPS = RANGE("+-*".toList) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = + tokenize(rs, s.toList).filterNot(_ match { + case T_WHITESPACE => true + case _ => false + }) + + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +tokenize_file(Nil, "nominal_library.ML") + + + + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("E" -> List(T_NUM), + "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)), + "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)), + "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)), + "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN)) + +def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match { + case (_, Nil) => true + case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2) + case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2) + case _ => false +} + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match { + case List(T_NT("E", tree)) => { println(tree); true } + case _ => { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs))) + tss.flatten.exists(parse1(g, _)) + } +} + + +println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1")) +println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)")) +println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)")) + + + diff -r e85600529ca5 -r 4794759139ea progs/parser1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,88 @@ +// A naive bottom-up parser with backtracking +// +// Needs: +// :load matcher.scala + +// some regular expressions +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised + +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class NT(s: String) extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("F" -> List(T_NUM), + "E" -> List(T_NUM), + "E" -> List(NT("E"), T_OP("+"), NT("E")), + "E" -> List(NT("E"), T_OP("-"), NT("E")), + "E" -> List(NT("E"), T_OP("*"), NT("E")), + "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) + + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples for chop +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse(g: Grammar, ts: List[Token]) : Boolean = { + println(ts) + if (ts == List(NT("E"))) true + else { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) + tss.flatten.exists(parse(g, _)) + } +} + +def parser(g: Grammar, s: String) = { + println("\n") + parse(g, Tok.fromString(s)) +} + + + +parser(grammar, "2 + 3 * 4 + 1") +parser(grammar, "(2 + 3) * (4 + 1)") +parser(grammar, "(2 + 3) * 4 (4 + 1)") + + + diff -r e85600529ca5 -r 4794759139ea progs/parser2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser2.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,139 @@ +// A naive version of parser combinators producing parse trees +// +// Needs +// :load matcher.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_IF extends Token +case object T_THEN extends Token +case object T_ELSE extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List(("if", (s) => T_IF), + ("then", (s) => T_THEN), + ("else", (s) => T_ELSE), + (NUMBER, (s) => T_NUM(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +// parse trees +abstract class ParseTree +case class Leaf(t: Token) extends ParseTree +case class Branch(pts: List[ParseTree]) extends ParseTree + +def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match { + case Leaf(t) => Branch(List(Leaf(t), pt2)) + case Branch(pts) => Branch(pts ++ List(pt2)) +} + +// parser combinators +abstract class Parser { + def parse(ts: List[Token]): Set[(ParseTree, List[Token])] + + def parse_all(ts: List[Token]) : Set[ParseTree] = + for ((head, tail) <- parse(ts); if (tail == Nil)) yield head + + def || (right : => Parser) : Parser = new AltParser(this, right) + def ~ (right : => Parser) : Parser = new SeqParser(this, right) +} + +class AltParser(p: => Parser, q: => Parser) extends Parser { + def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts) +} + +class SeqParser(p: => Parser, q: => Parser) extends Parser { + def parse(ts: List[Token]) = + for ((head1, tail1) <- p.parse(ts); + (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2) +} + +class ListParser(ps: => List[Parser]) extends Parser { + def parse(ts: List[Token]) = ps match { + case Nil => Set() + case p::Nil => p.parse(ts) + case p::ps => + for ((head1, tail1) <- p.parse(ts); + (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2) + } +} + +case class TokParser(tok: Token) extends Parser { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((Leaf(t), ts)) + case _ => Set () + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +case object IdParser extends Parser { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) + case _ => Set () + } +} + +case object NumParser extends Parser { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) + case _ => Set () + } +} + +lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol +lazy val T: Parser = (F ~ T_OP("*") ~ T) || F +lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser + +println(Tok.fromString("1 + 2 + 3")) +println(E.parse_all(Tok.fromString("1 + 2 + 3"))) + +def eval(t: ParseTree) : Int = t match { + case Leaf(T_NUM(n)) => n.toInt + case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) + case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) + case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) +} + +(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_)) +(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_)) + +lazy val EXPR: Parser = + new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || + new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || + IdParser + +println(EXPR.parse_all(Tok.fromString("if a then b else c"))) +println(EXPR.parse_all(Tok.fromString("if a then if x then y else c"))) + + + + diff -r e85600529ca5 -r 4794759139ea progs/parser2a.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser2a.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,105 @@ +// Parser combinators including semantic actions +// parses lists of tokens +// +// Needs +// :load matcher.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_IF extends Token +case object T_THEN extends Token +case object T_ELSE extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List(("if", (s) => T_IF), + ("then", (s) => T_THEN), + ("else", (s) => T_ELSE), + (NUMBER, (s) => T_NUM(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +// parser combinators with return type T +abstract class Parser[T] { + def parse(ts: List[Token]): Set[(T, List[Token])] + + def parse_all(ts: List[Token]) : Set[T] = + for ((head, tail) <- parse(ts); if (tail == Nil)) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) + def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) + +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: List[Token]) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse (sb: List[Token]) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +case class TokParser(tok: Token) extends Parser[Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T +lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F +lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser + +println(E.parse_all(Tok.fromString("1 + 2 + 3"))) +println(E.parse_all(Tok.fromString("1 + 2 * 3"))) +println(E.parse_all(Tok.fromString("(1 + 2) * 3"))) + +// Excercise: implement minus +println(E.parse_all(Tok.fromString("(1 - 2) * 3"))) +println(E.parse_all(Tok.fromString("(1 + 2) * - 3"))) diff -r e85600529ca5 -r 4794759139ea progs/parser3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser3.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,41 @@ +package object parser { + +// parser combinators +// with input type I and return type T +// +// needs to be compiled with scalac parser3.scala + +abstract class Parser[I <% Seq[_], T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head + + def parse_single(ts: I) : T = parse_all(ts).toList match { + case t::Nil => t + case _ => { println ("Parse Error") ; sys.exit(-1) } + } + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + +} diff -r e85600529ca5 -r 4794759139ea progs/parser4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser4.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,82 @@ + +// parser combinators with input type I and return type T + +case class SubString(s: String, l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def substring(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubString(s, low, high) + +} + +type Ctxt = List[(String, SubString)] + +abstract class Parser[T] { + + def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] + + def parse_all(s: String) : Set[T] = + for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case class SubStringParser(s: String) extends Parser[SubString] { + val n = s.length + def parse(sb: SubString, ctxt: Ctxt) = { + if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) + Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) + else Set() + } +} + +implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) + +class IgnLst[T](p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +// ambigous grammar +lazy val E: Parser[Int] = + new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || + ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || + "0" ==> { (s) => 0 } || + "1" ==> { (s) => 1 } || + "2" ==> { (s) => 2 } || + "3" ==> { (s) => 3 }) + +println(E.parse_all("1+2*3+3")) + + diff -r e85600529ca5 -r 4794759139ea progs/parser5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser5.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,113 @@ +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token + +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +// parser combinators with input type I and return type T +// and memoisation + +case class SubList[T](s: List[T], l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def sublist(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubList(s, low, high) +} + +type Ctxt[T] = List[(String, SubList[T])] + +abstract class Parser[I, T] { + + def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] + + def parse_all(s: List[I]) : Set[T] = + for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case object NumParser extends Parser[Token, Int] { + def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { + if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { + case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) + case _ => Set() + } + else Set() + } +} + +case class TokParser(t: Token) extends Parser[Token, Token] { + def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { + if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) + else Set() + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +lazy val E: Parser[Token, Int] = + new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} || + (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} || + NumParser) + +println(E.parse_all(Tk.fromString("1 + 2 * 3"))) +println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) diff -r e85600529ca5 -r 4794759139ea progs/re-alt.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re-alt.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,115 @@ +trait RegExp { + def nullable: Boolean + def derive(c: Char): RegExp +} + +case object Empty extends RegExp { + def nullable = false + def derive(c: Char) = Empty +} + +case object Eps extends RegExp { + def nullable = true + def derive(c: Char) = Empty +} + +case class Str(s: String) extends RegExp { + def nullable = s.isEmpty + def derive(c: Char) = + if (s.isEmpty || s.head != c) Empty + else Str(s.tail) +} + +case class Cat(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable && s.nullable + def derive(c: Char) = + if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c)) + else Cat(r.derive(c), s) +} + +case class Star(r: RegExp) extends RegExp { + def nullable = true + def derive(c: Char) = Cat(r.derive(c), this) +} + +case class Or(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable || s.nullable + def derive(c: Char) = Or(r.derive(c), s.derive(c)) +} + +case class And(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable && s.nullable + def derive(c: Char) = And(r.derive(c), s.derive(c)) +} + +case class Not(r: RegExp) extends RegExp { + def nullable = !r.nullable + def derive(c: Char) = Not(r.derive(c)) +} + + + + +object Matcher { + def matches(r: RegExp, s: String): Boolean = { + if (s.isEmpty) r.nullable + else matches(r.derive(s.head), s.tail) + } +} + + +object Pimps { + implicit def string2RegExp(s: String) = Str(s) + + implicit def regExpOps(r: RegExp) = new { + def | (s: RegExp) = Or(r, s) + def & (s: RegExp) = And(r, s) + def % = Star(r) + def %(n: Int) = rep(r, n) + def ? = Or(Eps, r) + def ! = Not(r) + def ++ (s: RegExp) = Cat(r, s) + def ~ (s: String) = Matcher.matches(r, s) + } + + implicit def stringOps(s: String) = new { + def | (r: RegExp) = Or(s, r) + def | (r: String) = Or(s, r) + def & (r: RegExp) = And(s, r) + def & (r: String) = And(s, r) + def % = Star(s) + def % (n: Int) = rep(Str(s), n) + def ? = Or(Eps, s) + def ! = Not(s) + def ++ (r: RegExp) = Cat(s, r) + def ++ (r: String) = Cat(s, r) + def ~ (t: String) = Matcher.matches(s, t) + } + + def rep(r: RegExp, n: Int): RegExp = + if (n <= 0) Star(r) + else Cat(r, rep(r, n - 1)) +} + + +object Test { + import Pimps._ + + val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + val int = ("+" | "-").? ++ digit.%(1) + val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).? + + def main(args: Array[String]) { + val ints = List("0", "-4534", "+049", "99") + val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") + val errs = List("", "-", "+", "+-1", "-+2", "2-") + + ints.foreach(s => assert(int ~ s)) + reals.foreach(s => assert(!(int ~ s))) + errs.foreach(s => assert(!(int ~ s))) + + ints.foreach(s => assert(real ~ s)) + reals.foreach(s => assert(real ~ s)) + errs.foreach(s => assert(!(real ~ s))) + } +} diff -r e85600529ca5 -r 4794759139ea progs/re-internal.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re-internal.rb Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,22 @@ +# provided by Daniel Baldwin + +nums = (1..100) + +#iterate through the nums 1-100 +nums.each do |i| + + start_time = Time.now + string = "a" * i + + #create a new regular expression based on current value of i + re = Regexp.new(/((a?){#{i}})(a{#{i}})/) + + re.match(string) + #if re.match(string) + # puts "matched string a * #{i} with regex #{re}" + #else + # puts "unmatched string a * #{i} with regex #{re}" + #end + + puts "#{i} %.5f" % (Time.now - start_time) +end diff -r e85600529ca5 -r 4794759139ea progs/re-internal.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re-internal.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,17 @@ + +// measures the time a function needs +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 10001 by 300) { + val re = ("((a?){" + i + "})(a{" + i + "})") + println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re)))) +} + + + diff -r e85600529ca5 -r 4794759139ea progs/re.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re.py Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,12 @@ +#!/usr/bin/env python +import re +import sys + +cn = sys.argv[1] + +r1 = '((a?){%s})' % cn +r2 = 'a{%s}' % cn + +m = re.match(r1 + r2 , "a" * int(cn)) + +print m.group(0) diff -r e85600529ca5 -r 4794759139ea progs/re.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,123 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + s match { + case Nil if (nullable(r)) => Some(Nil, t) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) + } + +def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => one_string(regs, s) match { + case (rest, s) => s.mkString :: tokenize(regs, rest) + } +} + +//examples +println(tokenize(regs, "if true then then 42 else +".toList)) +println(tokenize(regs, "if+true+then+then+42+else +".toList)) +println(tokenize(regs, "ifff if 34 34".toList)) +println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(regs, "/* if true then */ then 42 else +".toList)) +//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e85600529ca5 -r 4794759139ea progs/re0.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re0.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,117 @@ +import scala.annotation.tailrec + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLCHAR extends Rexp +case class CHAR(c: Char) extends Rexp +case class STR(s: String) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp +case class REP(r: Rexp, n: Int) extends Rexp + +// some convenience for typing in regular expressions +implicit def string2rexp(s : String) : Rexp = STR(s) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def %(n: Int) = REP(r, n) + def %%(n: Int) = SEQ(REP(r, n), STAR(r)) + def ? = ALT(EMPTY, r) + def unary_! = NOT(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def %(n: Int) = REP(s, n) + def %%(n: Int) = SEQ(REP(s, n), STAR(s)) + def ? = ALT(EMPTY, s) + def unary_! = NOT(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + +// nullable function: tests whether the regular +// expression can recognise the empty string + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLCHAR => false + case CHAR(_) => false + case STR(s) => s.isEmpty + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) + case REP(r, i) => if (i == 0) true else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLCHAR => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail) + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) + case REP(r, i) => + if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) +} + +// derivative w.r.t. a string (iterates der) +@tailrec +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +//examples +val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +val int = ("+" | "-").? ~ digit.%%(1) +val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).? + +val ints = List("0", "-4534", "+049", "99") +val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") +val errs = List("", "-", "+", "+-1", "-+2", "2-") + +ints.map(s => matcher(int, s)) +reals.map(s => matcher(int, s)) +errs.map(s => matcher(int, s)) + +ints.map(s => matcher(real, s)) +reals.map(s => matcher(real, s)) +errs.map(s => matcher(real, s)) + + + +def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 12000 by 500) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r e85600529ca5 -r 4794759139ea progs/re1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,80 @@ + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +//example +//val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) +//der('b', r) +//der('b', r) + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => EMPTY + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 29) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r e85600529ca5 -r 4794759139ea progs/re2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re2.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,100 @@ + +abstract class Rexp { + def simp : Rexp = this +} + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, r) => r + case (r, NULL) => r + case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) + case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) + } +} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, _) => NULL + case (_, NULL) => NULL + case (EMPTY, r) => r + case (r, EMPTY) => r + case (r1, r2) => SEQ(r1, r2) + } +} +case class STAR(r: Rexp) extends Rexp { + override def simp = r.simp match { + case NULL => EMPTY + case EMPTY => EMPTY + case r => STAR(r) + } +} + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r).simp) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => EMPTY + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 100) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r e85600529ca5 -r 4794759139ea progs/re3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re3.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,106 @@ + +abstract class Rexp { + def simp : Rexp = this +} + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, r) => r + case (r, NULL) => r + case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) + case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) + } +} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, _) => NULL + case (_, NULL) => NULL + case (EMPTY, r) => r + case (r, EMPTY) => r + case (r1, r2) => SEQ(r1, r2) + } +} +case class STAR(r: Rexp) extends Rexp { + override def simp = r.simp match { + case NULL => EMPTY + case EMPTY => EMPTY + case r => STAR(r) + } +} +case class NTIMES(r: Rexp, n: Int) extends Rexp { + override def simp = if (n == 0) EMPTY else + r.simp match { + case NULL => NULL + case EMPTY => EMPTY + case r => NTIMES(r, n) + } +} + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NTIMES(r, i) => + if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r).simp) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 11001 by 500) { + println(i + " " + + " " + time_needed(1, matcher(RTEST(i), "a" * i))) +} + + diff -r e85600529ca5 -r 4794759139ea progs/re4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/re4.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,101 @@ +import scala.annotation.tailrec +abstract class Rexp { + def simp : Rexp = this +} + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, r) => r + case (r, NULL) => r + case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) + case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (r1, r2) => ALT(r1, r2) + } +} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, _) => NULL + case (_, NULL) => NULL + case (EMPTY, r) => r + case (r, EMPTY) => r + case (r1, r2) => SEQ(r1, r2) + } +} +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) false else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NTIMES(r, i) => + if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) +} + +// derivative w.r.t. a string (iterates der) +@tailrec +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r).simp) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +/*def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => NULL + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +}*/ + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 13001 by 500) { + println(i + " " + time_needed(1, matcher(RTEST(i), "a" * i))) +} + + diff -r e85600529ca5 -r 4794759139ea progs/regexp.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/regexp.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,106 @@ +// regular expressions +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// for example +println(STAR("abc")) + +// produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY)))) + + + +// a simple-minded regular expression matcher: +// it loops for examples like STAR(EMPTY) with +// strings this regular expression does not match + +def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match { + case (NULL::rs, s) => false + case (EMPTY::rs, s) => smatchers(rs, s) + case (CHAR(c)::rs, Nil) => false + case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) + case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s) + case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) + case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) + case (Nil, s) => s == Nil +} + +def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList) + +// regular expression: a +println(smatcher(CHAR('a'), "ab")) + +// regular expression: a + (b o c) +println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) + +// regular expression: a + (b o c) +println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) + +// loops for regular expression epsilon* +//println(smatcher(STAR(EMPTY), "a")) + + + +// Regular expression matcher that works properly +//================================================ + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +//examples + +println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) +println(matcher(ALT(STAR("a"), STAR("b")), "")) +println(matcher("abc", "")) +println(matcher(STAR(ALT(EMPTY, "a")), "")) +println(matcher(STAR(EMPTY), "a")) +println(matcher("cab","cab")) +println(matcher(STAR("a"),"aaa")) +println(matcher("cab" ,"cab")) +println(matcher(STAR("a"),"aaa")) + + diff -r e85600529ca5 -r 4794759139ea progs/regexp2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/regexp2.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,123 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + s match { + case Nil if (nullable(r)) => Some(Nil, t) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) + } + +def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => one_string(regs, s) match { + case (rest, s) => s.mkString :: tokenize(regs, rest) + } +} + +//examples +println(tokenize(regs, "if true then then 42 else +".toList)) +println(tokenize(regs, "if+true+then+then+42+else +".toList)) +println(tokenize(regs, "ifff if 34 34".toList)) +println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(regs, "/* if true then */ then 42 else +".toList)) +//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e85600529ca5 -r 4794759139ea progs/regexp3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/regexp3.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,141 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case class T_IDENT(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(n: Int) extends Token +case class T_KEYWORD(s: String) extends Token + + +// an example list of syntactic rules +type Rule = (Rexp, List[Char] => Token) + +val rules: List[Rule]= + List(("if", (s) => T_KEYWORD(s.mkString)), + ("then", (s) => T_KEYWORD(s.mkString)), + ("else", (s) => T_KEYWORD(s.mkString)), + ("+", (s) => T_OP(s.mkString)), + (IDENT, (s) => T_IDENT(s.mkString)), + (NUMBER, (s) => T_NUM(s.mkString.toInt)), + (WHITESPACES, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + + +def error (s: String) = throw new IllegalArgumentException ("Cannot tokenize: " + s) + +def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + +def one_token (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { + val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (rs: List[Rule], s: List[Char]) : List[Token] = s match { + case Nil => Nil + case _ => one_token(rs, s) match { + case (rest, token) => token :: tokenize(rs, rest) + } +} + +//examples +println(tokenize(rules, "if true then then 42 else +".toList)) +println(tokenize(rules, "if+true+then+then+42+else +".toList)) +println(tokenize(rules, "ifff if 34 34".toList)) +println(tokenize(rules, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(rules, "/* if true then */ then 42 else +".toList)) +//println(tokenize(rules, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e85600529ca5 -r 4794759139ea progs/regexp4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/regexp4.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,168 @@ +// regular expressions +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// recognises nothing +def zeroable (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => zeroable(r1) && zeroable(r2) + case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) + case STAR(_) => false + case NOT(r) => !(zeroable(r)) +} + +def starts_with (r: Rexp, c: Char) : Boolean = r match { + case NULL => false + case EMPTY => false + case CHAR(d) => (c == d) + case ALT(r1, r2) => starts_with(r1, c) || starts_with(r2, c) + case SEQ(r1, r2) => if (nullable(r1)) (starts_with(r1, c) || starts_with(r2, c)) + else starts_with(r1, c) + case STAR(r) => starts_with(r, c) + case NOT(r) => !(starts_with(r, c)) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGITS = RANGE("0123456789".toList) +val NONZERODIGITS = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) +val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val WHITESPACE = RANGE(" \n".toList) +val SYMBOLS = RANGE("/*".toList) + +val ALL = ALT(ALT(ALT(LETTER, DIGITS), WHITESPACE), SYMBOLS) + +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + +println(matcher(NUMBER, "0")) +println(matcher(NUMBER, "01")) +println(matcher(NUMBER, "123450")) + +println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) +println(matcher(ALT(STAR("a"), STAR("b")), "")) +println(matcher("abc", "")) +println(matcher(STAR(ALT(EMPTY, "a")), "")) +println(matcher(STAR(EMPTY), "a")) +println(matcher("cab","cab")) +println(matcher(STAR("a"),"aaa")) +println(matcher("cab" ,"cab")) +println(matcher(STAR("a"),"aaa")) + +println(matcher(COMMENT, "/* */")) +println(matcher(COMMENT, "/* foobar comment */")) +println(matcher(COMMENT, "/* test */ test */")) + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, COMMENT, WHITESPACE) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + if (zeroable(r)) None else s match { + case Nil => if (nullable(r)) Some(Nil, t) else None + case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, t) + //case c::s if (zeroable(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) +} + + +def lex_one (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def lex_all (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => lex_one(regs, s) match { + case (rest, s) => s.mkString :: lex_all(regs, rest) + } +} + + + +starts_with(der('/', COMMENT), '*') + +munch(COMMENT, "/*ifff if 34 */".toList, Nil) +val COMMENT2 = NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL))) + +der('a', COMMENT2) +zeroable(der('a', COMMENT2)) + +matcher(COMMENT2, "ifff if 34") +munch(COMMENT2, "ifff if 34".toList, Nil) +starts_with(COMMENT2, 'i') +lex_all(regs, "ifff if 34".toList) +lex_all(regs, "ifff $ if 34".toList) + diff -r e85600529ca5 -r 4794759139ea progs/regexp5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/regexp5.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,177 @@ +// regular expressions +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// recognises nothing +def zeroable (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => zeroable(r1) && zeroable(r2) + case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1) + //zeroable(r1) || zeroable(r2) + case STAR(_) => false + case NOT(r) => !(zeroable(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGITS = RANGE("0123456789".toList) +val NONZERODIGITS = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) +val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val WHITESPACE = RANGE(" \n".toList) + +val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE) + +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + +println(matcher(NUMBER, "0")) +println(matcher(NUMBER, "01")) +println(matcher(NUMBER, "123450")) + +println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) +println(matcher(ALT(STAR("a"), STAR("b")), "")) +println(matcher("abc", "")) +println(matcher(STAR(ALT(EMPTY, "a")), "")) +println(matcher(STAR(EMPTY), "a")) +println(matcher("cab","cab")) +println(matcher(STAR("a"),"aaa")) +println(matcher("cab" ,"cab")) +println(matcher(STAR("a"),"aaa")) + +println(matcher(COMMENT, "/* */")) +println(matcher(COMMENT, "/* 34 */")) +println(matcher(COMMENT, "/* foobar comment */")) +println(matcher(COMMENT, "/* test */ test */")) + +// an example list of regular expressions + +abstract class Token + +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case class T_IDENT(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(n: Int) extends Token +case class T_KEYWORD(s: String) extends Token + +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACE) + +type Rule = (Rexp, List[Char] => Token) + +val rules: List[Rule]= + List(("if", (s) => T_KEYWORD(s.mkString)), + ("then", (s) => T_KEYWORD(s.mkString)), + ("else", (s) => T_KEYWORD(s.mkString)), + ("+", (s) => T_OP(s.mkString)), + (IDENT, (s) => T_IDENT(s.mkString)), + (NUMBER, (s) => T_NUM(s.mkString.toInt)), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = +{ println("string " + s) + println(" rexp " + r) + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => { println("1"); None } + case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (zeroable(der (c, r))) => { println("2"); None } + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } +} + +def lex_one (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { + val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def lex_all (rs: List[Rule], s: List[Char]) : List[Token] = s match { + case Nil => Nil + case _ => lex_one(rs, s) match { + case (rest, t) => t :: lex_all(rs, rest) + } +} + + + +println(matcher(COMMENT, "/*ifff if 34 34*/")) +rules.map { (r) => munch(r._1, r._2, "/*ifff if 34 34*/ ".toList, Nil) } +println(lex_all(rules, "ifff if 34 34".toList)) +println(lex_all(rules, " /*ifff if 34 34*/ ".toList)) +println(lex_all(rules, "ifff $ if 34".toList)) + + diff -r e85600529ca5 -r 4794759139ea progs/scraper.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/scraper.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,57 @@ +import java.io.OutputStreamWriter +import java.net.URL +import scala.io.Source.fromInputStream + +val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") + +//connect to url +val conn = url.openConnection +conn.setRequestProperty("User-Agent", "") +conn.setDoOutput(true) +conn.connect + +//sending data +val wr = new OutputStreamWriter(conn.getOutputStream()) +//wr.write("Fdate=2012-9-24&Tdate=2012-09-25") +//wr.write("Fdate=2012-9-18&Tdate=2012-09-25") +wr.write("Fdate=2001-5-18&Tdate=2012-09-25") +wr.flush +wr.close + +//receiving data +val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") + +//println(page) + +// regular expression . excludes newlines, +// therefore we have to use [\S\s] +val regex1 = """[\S\s]*?""".r +val rows = regex1.findAllIn(page).toList + +//print(rows) + +val regex2 = """([\S\s]*?)""".r + +def aux(s: String) : Array[String] = { + for (m <- regex2.findAllIn(s).toArray) yield m match { + case regex2(value) => value.trim + } +} + +val data = rows.map { aux } + +def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt + +//day with highest particle pollution (PM_10) +data.sortWith(compare(1)).last + +//day with highest sulfur dioxide (SO_2) +data.sortWith(compare(2)).last + +//day with highest nitro dioxide (NO_2) +data.sortWith(compare(3)).last + +//days with highest PM_10 +val groups = data.groupBy(_(1).toInt) +val max_key = groups.keySet.max +groups(max_key) diff -r e85600529ca5 -r 4794759139ea progs/while.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,231 @@ +// A parser and evaluator for teh while language +// +import matcher._ +import parser._ + + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } || + (T_KWD("true") ==> ((_) => True)) || + (T_KWD("false") ==> ((_) => False: BExp)) + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + + +// examples +val p1 = "x := 5" +val p1_toks = Tok.fromString(p1) +val p1_ast = Block.parse_all(p1_toks) +println(p1_toks) +println(p1_ast) + +val p1a = "{ x := 5; y := 8}" +val p1a_toks = Tok.fromString(p1a) +val p1a_ast = Block.parse_all(p1a_toks) +println(p1a_ast) + +val p2 = "5 = 6" +val p2_toks = Tok.fromString(p2) +val p2_ast = BExp.parse_all(p2_toks) +println(p2_ast) + +val p2a = "true" +val p2a_toks = Tok.fromString(p2a) +val p2a_ast = BExp.parse_all(p2a_toks) +println(p2a_ast) + +val p3 = "if true then skip else skip" +val p3_toks = Tok.fromString(p3) +val p3_ast = Stmt.parse_all(p3_toks) +println(p3_ast) + +val p3a = "if true then x := 5 else x := 10" +val p3a_toks = Tok.fromString(p3a) +val p3a_ast = Stmt.parse_all(p3a_toks) +println(p3a_ast) + +val p3b = "if false then x := 5 else x := 10" +val p3b_toks = Tok.fromString(p3b) +val p3b_ast = Stmt.parse_all(p3b_toks) +println(p3b_ast) + +// multiplication +val p4 = """{ x := 5; + y := 4; + r := 0; + while y > 0 do { + r := r + x; + y := y - 1 + } + }""" +val p4_toks = Tok.fromString(p4) +val p4_ast = Block.parse_all(p4_toks) +println(p4_ast) + +val p5 = """ + n := 9; + minus1 := 0; + minus2 := 1; + temp := 0; + while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 + }; + fib_res := minus2 +""" +val p5_toks = Tok.fromString(p5) +val p5_ast = Stmts.parse_all(p5_toks) + +// interpreter +type Env = Map[String, Int] + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) +} + +def eval_aexp(a: AExp, env : Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +//examples +println(eval_stmt(p3a_ast.head, Map.empty)) +println(eval_stmt(p3b_ast.head, Map.empty)) +println(eval_bl(p4_ast.head, Map.empty)) +println(eval_bl(p5_ast.head, Map.empty)) diff -r e85600529ca5 -r 4794759139ea progs/while1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,220 @@ +// A parser and evaluator for the WHILE language +// +import matcher._ +import parser._ + + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// interpreter +type Env = Map[String, Int] + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) +} + +def eval_aexp(a: AExp, env : Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env + case Write(x) => { println(env(x)); env } +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +def eval_prog(name: String) : Env = { + val tks = Tok.fromFile(name) + val ast = Stmts.parse_single(tks) + eval_bl(ast, Map.empty) +} + + +//examples + +//eval_prog("loops.while") +eval_prog("fib.while") + + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +} +""" + + + +def eval_test(n: Int) : Unit = { + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + println(n + " " + time_needed(2, eval_bl(ast, Map.empty))) +} + +List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_)) + + + + + + + diff -r e85600529ca5 -r 4794759139ea proof.pdf Binary file proof.pdf has changed diff -r e85600529ca5 -r 4794759139ea proof.tex --- a/proof.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions -\begin{document} - -\section*{Proof} - -Recall the definitions for regular expressions and the language associated with a regular expression: - -\begin{center} -\begin{tabular}{c} -\begin{tabular}[t]{rcl} - $r$ & $::=$ & $\varnothing$ \\ - & $\mid$ & $\epsilon$ \\ - & $\mid$ & $c$ \\ - & $\mid$ & $r_1 \cdot r_2$ \\ - & $\mid$ & $r_1 + r_2$ \\ - & $\mid$ & $r^*$ \\ - \end{tabular}\hspace{10mm} -\begin{tabular}[t]{r@{\hspace{1mm}}c@{\hspace{1mm}}l} -$L(\varnothing)$ & $\dn$ & $\varnothing$ \\ -$L(\epsilon)$ & $\dn$ & $\{\texttt{""}\}$ \\ -$L(c)$ & $\dn$ & $\{\texttt{"}c\texttt{"}\}$ \\ -$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ \\ -$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ \\ - $L(r^*)$ & $\dn$ & $\bigcup_{n\ge 0} L(r)^n$ \\ - \end{tabular} -\end{tabular} -\end{center} - -\noindent -We also defined the notion of a derivative of a regular expression (the derivative with respect to a character): - -\begin{center} -\begin{tabular}{lcl} - $der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$ \\ - $der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ \\ - $der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\ - $der\, c\, (r_1 + r_2)$ & $\dn$ & $(der\, c\, r_1) + (der\, c\, r_2)$ \\ - $der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable(r_1)$\\ - & & then $((der\, c\, r_1) \cdot r_2) + (der\, c\, r_2)$\\ - & & else $(der\, c\, r_1) \cdot r_2$\\ - $der\, c\, (r^*)$ & $\dn$ & $(der\, c\, r) \cdot (r^*)$\\ - \end{tabular} -\end{center} - -\noindent -With our definition of regular expressions comes an induction principle. Given a property $P$ over -regular expressions. We can establish that $\forall r.\; P(r)$ holds, provided we can show the following: - -\begin{enumerate} -\item $P(\varnothing)$, $P(\epsilon)$ and $P(c)$ all hold, -\item $P(r_1 + r_2)$ holds under the induction hypotheses that -$P(r_1)$ and $P(r_2)$ hold, -\item $P(r_1 \cdot r_2)$ holds under the induction hypotheses that -$P(r_1)$ and $P(r_2)$ hold, and -\item $P(r^*)$ holds under the induction hypothesis that $P(r)$ holds. -\end{enumerate} - -\noindent -Let us try out an induction proof. Recall the definition - -\begin{center} -$Der\, c\, A \dn \{ s\;\mid\; c\!::\!s \in A\}$ -\end{center} - -\noindent -whereby $A$ is a set of strings. We like to prove - -\begin{center} -\begin{tabular}{l} -$P(r) \dn $ \hspace{4mm} $L(der\,c\,r) = Der\,c\,(L(r))$ -\end{tabular} -\end{center} - -\noindent -by induction over the regular expression $r$. - - -\newpage -\noindent -{\bf Proof} - -\noindent -According to 1.~above we need to prove $P(\varnothing)$, $P(\epsilon)$ and $P(d)$. Lets do this in turn. - -\begin{itemize} -\item First Case: $P(\varnothing)$ is $L(der\,c\,\varnothing) = Der\,c\,(L(\varnothing))$ (a). We have $der\,c\,\varnothing = \varnothing$ -and $L(\varnothing) = \varnothing$. We also have $Der\,c\,\varnothing = \varnothing$. Hence we have $\varnothing = \varnothing$ in (a). - -\item Second Case: $P(\epsilon)$ is $L(der\,c\,\epsilon) = Der\,c\,(L(\epsilon))$ (b). We have $der\,c\,\epsilon = \varnothing$, -$L(\varnothing) = \varnothing$ and $L(\epsilon) = \{\texttt{""}\}$. We also have $Der\,c\,\{\texttt{""}\} = \varnothing$. Hence we have -$\varnothing = \varnothing$ in (b). - -\item Third Case: $P(d)$ is $L(der\,c\,d) = Der\,c\,(L(d))$ (c). We need to treat the cases $d = c$ and $d \not= c$. - -$d = c$: We have $der\,c\,c = \epsilon$ and $L(\epsilon) = \{\texttt{""}\}$. -We also have $L(c) = \{\texttt{"}c\texttt{"}\}$ and $Der\,c\,\{\texttt{"}c\texttt{"}\} = \{\texttt{""}\}$. Hence we have -$\{\texttt{""}\} = \{\texttt{""}\}$ in (c). - -$d \not=c$: We have $der\,c\,d = \varnothing$. -We also have $Der\,c\,\{\texttt{"}d\texttt{"}\} = \varnothing$. Hence we have -$\varnothing = \varnothing$ in (c). -\end{itemize} - -\noindent -These were the easy base cases. Now come the inductive cases. - -\begin{itemize} -\item Fourth Case: $P(r_1 + r_2)$ is $L(der\,c\,(r_1 + r_2)) = Der\,c\,(L(r_1 + r_2))$ (d). This is what we have to show. -We can assume already: - -\begin{center} -\begin{tabular}{ll} -$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ -$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) -\end{tabular} -\end{center} - -We have that $der\,c\,(r_1 + r_2) = (der\,c\,r_1) + (der\,c\,r_2)$ and also $L((der\,c\,r_1) + (der\,c\,r_2)) = L(der\,c\,r_1) \cup L(der\,c\,r_2)$. -By (I) and (II) we know that the left-hand side is $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$. You need to ponder a bit, but you should see -that - -\begin{center} -$Der\,c(A \cup B) = (Der\,c\,A) \cup (Der\,c\,B)$ -\end{center} - -holds for every set of strings $A$ and $B$. That means the right-hand side of (d) is also $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$, -because $L(r_1 + r_2) = L(r_1) \cup L(r_2)$. And we are done with the fourth case. - -\item Fifth Case: $P(r_1 \cdot r_2)$ is $L(der\,c\,(r_1 \cdot r_2)) = Der\,c\,(L(r_1 \cdot r_2))$ (e). We can assume already: - -\begin{center} -\begin{tabular}{ll} -$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ -$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) -\end{tabular} -\end{center} - -Let us first consider the case where $nullable(r_1)$ holds. Then - -\[ -der\,c\,(r_1 \cdot r_2) = ((der\,c\,r_1) \cdot r_2) + (der\,c\,r_2). -\] - -The corresponding language of the right-hand side is - -\[ -(L(der\,c\,r_1) \,@\, L(r_2)) \cup L(der\,c\,r_2). -\] - -By the induction hypotheses (I) and (II), this is equal to - -\[ -(Der\,c\,(L(r_1)) \,@\, L(r_2)) \cup (Der\,c\,(L(r_2)).\;\;(**) -\] - -We also know that $L(r_1 \cdot r_2) = L(r_1) \,@\,L(r_2)$. We have to know what -$Der\,c\,(L(r_1) \,@\,L(r_2))$ is. - -Let us analyse what -$Der\,c\,(A \,@\, B)$ is for arbitrary sets of strings $A$ and $B$. If $A$ does \emph{not} -contain the empty string, then every string in $A\,@\,B$ is of the form $s_1 \,@\, s_2$ where -$s_1 \in A$ and $s_2 \in B$. So if $s_1$ starts with $c$ then we just have to remove it. Consequently, -$Der\,c\,(A \,@\, B) = (Der\,c\,(A)) \,@\, B$. This case does not apply here though, because we already -proved that if $r_1$ is nullable, then $L(r_1)$ contains the empty string. In this case, every string -in $A\,@\,B$ is either of the form $s_1 \,@\, s_2$, with $s_1 \in A$ and $s_2 \in B$, or -$s_3$ with $s_3 \in B$. This means $Der\,c\,(A \,@\, B) = ((Der\,c\,(A)) \,@\, B) \cup Der\,c\,B$. -But this proves that (**) is $Der\,c\,(L(r_1) \,@\, L(r_2))$. - -Similarly in the case where $r_1$ is \emph{not} nullable. - -\item Sixth Case: $P(r^*)$ is $L(der\,c\,(r^*)) = Der\,c\,L(r^*)$. We can assume already: - -\begin{center} -\begin{tabular}{ll} -$P(r)$: & $L(der\,c\,r) = Der\,c\,(L(r))$ (I) -\end{tabular} -\end{center} - -We have $der\,c\,(r^*) = der\,c\,r\cdot r^*$. Which means $L(der\,c\,(r^*)) = L(der\,c\,r\cdot r^*)$ and -further $L(der\,c\,r) \,@\, L(r^*)$. By induction hypothesis (I) we know that is equal to -$(Der\,c\,L(r)) \,@\, L(r^*)$. (*) - -\end{itemize} - - - - -Let us now analyse $Der\,c\,L(r^*)$, which is equal to $Der\,c\,((L(r))^*)$. Now $(L(r))^*$ is defined -as $\bigcup_{n \ge 0} L(r)$. We can write this as $L(r)^0 \cup \bigcup_{n \ge 1} L(r)$, where we just -separated the first union and then let the ``big-union'' start from $1$. Form this we can already infer - -\begin{center} -$Der\,c\,(L(r^*)) = Der\,c\,(L(r)^0 \cup \bigcup_{n \ge 1} L(r)) = (Der\,c\,L(r)^0) \cup Der\,c\,(\bigcup_{n \ge 1} L(r))$ -\end{center} - -The first union ``disappears'' since $Der\,c\,(L(r)^0) = \varnothing$. - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r e85600529ca5 -r 4794759139ea re-internal.rb --- a/re-internal.rb Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -# provided by Daniel Baldwin - -nums = (1..100) - -#iterate through the nums 1-100 -nums.each do |i| - - start_time = Time.now - string = "a" * i - - #create a new regular expression based on current value of i - re = Regexp.new(/((a?){#{i}})(a{#{i}})/) - - re.match(string) - #if re.match(string) - # puts "matched string a * #{i} with regex #{re}" - #else - # puts "unmatched string a * #{i} with regex #{re}" - #end - - puts "#{i} %.5f" % (Time.now - start_time) -end diff -r e85600529ca5 -r 4794759139ea re.py --- a/re.py Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#!/usr/bin/env python -import re -import sys - -cn = sys.argv[1] - -r1 = '((a?){%s})' % cn -r2 = 'a{%s}' % cn - -m = re.match(r1 + r2 , "a" * int(cn)) - -print m.group(0) diff -r e85600529ca5 -r 4794759139ea scala/S_grammar-token.scala --- a/scala/S_grammar-token.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -//:load matcher.scala -//:load parser3.scala - -abstract class Token -case object T_ONE extends Token - -val lexing_rules : List[Rule[Token]] = - List(("1", (s: List[Char]) => T_ONE)) - -val T = Tokenizer(lexing_rules) - -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tokparser(t: Token) = TokParser(t) - -case object EmpParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = Set(("", ts)) -} - - -lazy val Su: Parser[List[Token], String] = - (T_ONE ~ Su) ==> { case (x, y) => "1" + y} || EmpParser - - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -def test(i: Int) = { - val result = Su.parse_all(T.fromString("1" * i)) - //print(result.size + " ") -} - - -for (i <- 1 to 1000 by 50) { - print(i + " ") - print("%.5f".format(time_needed(1, test(i)))) - print("\n") -} - diff -r e85600529ca5 -r 4794759139ea scala/S_grammar.scala --- a/scala/S_grammar.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -//:load parser3.scala - -case class StringParser(s: String) extends Parser[String, String] { - def parse(ts: String) = { - if (s.length <= ts.length && ts.startsWith(s)) Set((s, ts.drop(s.length))) - else Set() - } -} - -implicit def string2parser(s: String) = StringParser(s) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// unambiguous grammar - -lazy val U: Parser[String, String] = - ("1" ~ U) ==> { case (x, y) => "1" + y} || "" - -def test1(i: Int) = { - val result = U.parse_all("1" * i) - //print(result.size + " ") -} - -for (i <- 1 to 1000 by 50) { - print(i + " ") - print("%.5f".format(time_needed(1, test1(i)))) - print("\n") -} - - - -// ambiguous grammar -// n = 16 -> over 35 million parse trees - -lazy val S: Parser[String, String] = - ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "" - -def test2(i: Int) = { - val result = S.parse_all("1" * i) - print(result.size + " ") -} - -for (i <- 1 to 30) { - print(i + " ") - print("%.5f".format(time_needed(1, test2(i)))) - print("\n") -} diff -r e85600529ca5 -r 4794759139ea scala/Term_grammar.scala --- a/scala/Term_grammar.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -//:load matcher.scala -//:load parser3.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been lexed -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token - - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -lazy val E: Parser[List[Token], Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T -lazy val T: Parser[List[Token], Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F -lazy val F: Parser[List[Token], Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser - -println(E.parse_all(Tk.fromString("1 + 2 + 3"))) -println(E.parse_all(Tk.fromString("1 + 2 * 3"))) -println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) -println(E.parse_all(Tk.fromString("(14 + 2) * (3 + 2)"))) - diff -r e85600529ca5 -r 4794759139ea scala/app0.scala --- a/scala/app0.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -import io.Source - -def get_page(url: String) : String = { - Source.fromURL(url).take(10000).mkString - - diff -r e85600529ca5 -r 4794759139ea scala/app1.scala --- a/scala/app1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - diff -r e85600529ca5 -r 4794759139ea scala/app2.scala --- a/scala/app2.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -val http_pattern = """\"https?://[^\"]*\"""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - diff -r e85600529ca5 -r 4794759139ea scala/app3.scala --- a/scala/app3.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -val my_urls = """urbanc""".r - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else if (my_urls.findFirstIn(url) == None) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} diff -r e85600529ca5 -r 4794759139ea scala/app4.scala --- a/scala/app4.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -val http_pattern = """\"https?://[^\"]*\"""".r -val my_urls = """urbanc""".r -val email_pattern = - """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - for (u <- get_all_URLs(page)) crawl(u, n - 1) - } -} diff -r e85600529ca5 -r 4794759139ea scala/app5.scala --- a/scala/app5.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} diff -r e85600529ca5 -r 4794759139ea scala/app51.scala --- a/scala/app51.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp diff -r e85600529ca5 -r 4794759139ea scala/app6.scala --- a/scala/app6.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -def deriv (r: Rexp, c: Char) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) - else SEQ(deriv(r1, c), r2) - case STAR(r) => SEQ(deriv(r, c), STAR(r)) -} - diff -r e85600529ca5 -r 4794759139ea scala/app7.scala --- a/scala/app7.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -abstract class Parser[I, T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) - yield head - - def || (right : => Parser[I, T]) : Parser[I, T] = - new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = - new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = - new SeqParser(this, right) -} - - diff -r e85600529ca5 -r 4794759139ea scala/app8.scala --- a/scala/app8.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S]) - extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) - yield ((head1, head2), tail2) -} - -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T]) - extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I, T, S](p: => Parser[I, T], f: T => S) - extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) - yield (f(head), tail) -} - - diff -r e85600529ca5 -r 4794759139ea scala/automata.scala --- a/scala/automata.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - -// a class for deterministic finite automata, -// the type of states is kept polymorphic - -case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { - - // the transition function lifted to list of characters - def deltas(q: A, cs: List[Char]) : Either[A, String] = - if (states.contains(q)) cs match { - case Nil => Left(q) - case c::cs => - if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) - else Right(q + " does not have a transition for " + c) - } - else Right(q + " is not a state of the automaton") - - // wether a string is accepted by the automaton - def accepts(s: String) = deltas(start, s.toList) match { - case Left(q) => fins.contains(q) - case _ => false - } -} - - -// translating a regular expression into a finite -// automaton - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -implicit def string2rexp(s : String) = { - def chars2rexp (cs: List[Char]) : Rexp = cs match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::cs => SEQ(CHAR(c), chars2rexp(cs)) - } - chars2rexp(s.toList) -} - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -def der (r: Rexp, c: Char) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) - case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) - else SEQ(der(r1, c), r2) - case STAR(r) => SEQ(der(r, c), STAR(r)) -} - - -// Here we construct an automaton whose -// states are regular expressions -type State = Rexp -type States = Set[State] -type Transition = Map[(State, Char), State] - -// we use as an alphabet all lowercase letters -val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet - -def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { - val q_der : State = der(q, c) - if (qs.contains(q_der)) (qs, delta + ((q, c) -> q)) - else explore(qs + q_der, delta + ((q, c) -> q_der), q_der) -} - -def explore (qs: States, delta: Transition, q: State) : (States, Transition) = - alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) - - -def mk_automaton (r: Rexp) : Automaton[Rexp] = { - val (qs, delta) = explore(Set(r), Map(), r); - val fins = for (q <- qs if nullable(q)) yield q; - Automaton[Rexp](r, qs, delta, fins) -} - -val A = mk_automaton(ALT("ab","ac")) - -A.start -A.states.toList.length - -println(A.accepts("bd")) -println(A.accepts("ab")) -println(A.accepts("ac")) - -val r1 = STAR(ALT("a","b")) -val r2 = SEQ("b","b") -val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a") -val B = mk_automaton(r3) - -B.start -B.states.toList.length diff -r e85600529ca5 -r 4794759139ea scala/automata1.scala --- a/scala/automata1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,95 +0,0 @@ - -// a class for deterministic finite automata, -// the type of states is kept polymorphic - -case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { - - // the transition function lifted to list of characters - def deltas(q: A, cs: List[Char]) : A = - if (states.contains(q)) cs match { - case Nil => q - case c::cs => - if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) - else throw new RuntimeException(q + " does not have a transition for " + c) - } - else throw new RuntimeException(q + " is not a state of the automaton") - - // wether a string is accepted by the automaton - def accepts(s: String) = - try { - fins.contains(deltas(start, s.toList)) - } catch { - case e:RuntimeException => false - } -} - - - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -implicit def string2rexp(s : String) = { - def chars2rexp (cs: List[Char]) : Rexp = cs match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::cs => SEQ(CHAR(c), chars2rexp(cs)) - } - chars2rexp(s.toList) -} - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -def der (r: Rexp, c: Char) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) - case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) - else SEQ(der(r1, c), r2) - case STAR(r) => SEQ(der(r, c), STAR(r)) -} - - -// Here we construct an automaton whose -// states are regular expressions -type State = Rexp -type States = Set[State] -type Transition = Map[(State, Char), State] - -def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { - val qc : State = der(q, c) - if (qs.contains(qc)) (qs, delta + ((q, c) -> q)) - else explore(qs + qc, delta + ((q, c) -> qc), qc) -} - -// we use as alphabet all lowercase letters -val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet - -def explore (qs: States, delta: Transition, q: State) : (States, Transition) = - alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) - - -def mk_automaton (r: Rexp) : Automaton[Rexp] = { - val (qs, delta) = explore(Set(r), Map(), r); - val fins = for (q <- qs if nullable(q)) yield q; - Automaton[Rexp](r, qs, delta, fins) -} - -val A = mk_automaton(ALT("ab","ac")) - -println(A.accepts("bd")) -println(A.accepts("ab")) -println(A.accepts("ac")) diff -r e85600529ca5 -r 4794759139ea scala/compile.scala --- a/scala/compile.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,326 +0,0 @@ -// A parser and evaluator for teh while language -// -import matcher._ -import parser._ - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Relop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// compiler -val beginning = """ -.class public XXX.XXX -.super java/lang/Object - -.method public ()V - aload_0 - invokenonvirtual java/lang/Object/()V - return -.end method - -.method public static write(I)V - .limit locals 5 - .limit stack 5 - iload 0 - getstatic java/lang/System/out Ljava/io/PrintStream; - swap - invokevirtual java/io/PrintStream/println(I)V - return -.end method - - -.method public static main([Ljava/lang/String;)V - .limit locals 200 - .limit stack 200 - -""" - -val ending = """ - - return - -.end method -""" - -// for generating new labels -var counter = -1 - -def Fresh(x: String) = { - counter += 1 - x ++ "_" ++ counter.toString() -} - -type Env = Map[String, String] -type Instrs = List[String] - -def compile_aexp(a: AExp, env : Env) : Instrs = a match { - case Num(i) => List("ldc " + i.toString + "\n") - case Var(s) => List("iload " + env(s) + "\n") - case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") - case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") - case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") -} - -def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { - case True => Nil - case False => List("goto " + jmp + "\n") - case Relop("=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") - case Relop("!=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") - case Relop("<", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") -} - - -def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { - case Skip => (Nil, env) - case Assign(x, a) => { - val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString - (compile_aexp(a, env) ++ - List("istore " + index + "\n"), env + (x -> index)) - } - case If(b, bl1, bl2) => { - val if_else = Fresh("If_else") - val if_end = Fresh("If_end") - val (instrs1, env1) = compile_bl(bl1, env) - val (instrs2, env2) = compile_bl(bl2, env1) - (compile_bexp(b, env, if_else) ++ - instrs1 ++ - List("goto " + if_end + "\n") ++ - List("\n" + if_else + ":\n\n") ++ - instrs2 ++ - List("\n" + if_end + ":\n\n"), env2) - } - case While(b, bl) => { - val loop_begin = Fresh("Loop_begin") - val loop_end = Fresh("Loop_end") - val (instrs1, env1) = compile_bl(bl, env) - (List("\n" + loop_begin + ":\n\n") ++ - compile_bexp(b, env, loop_end) ++ - instrs1 ++ - List("goto " + loop_begin + "\n") ++ - List("\n" + loop_end + ":\n\n"), env1) - } - case Write(x) => - (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) -} - -def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { - case Nil => (Nil, env) - case s::bl => { - val (instrs1, env1) = compile_stmt(s, env) - val (instrs2, env2) = compile_bl(bl, env1) - (instrs1 ++ instrs2, env2) - } -} - -def compile(input: String) : String = { - val class_name = input.split('.')(0) - val tks = Tok.fromFile(input) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) -} - - -def compile_to(input: String, output: String) = { - val fw = new java.io.FileWriter(output) - fw.write(compile(input)) - fw.close() -} - -// -val tks = Tok.fromString("x := x + 1") -val ast = Stmt.parse_single(tks) -println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) - - - -//examples - -compile_to("loops.while", "loops.j") -//compile_to("fib.while", "fib.j") - - -// testing cases for time measurements - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// for testing -import scala.sys.process._ - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -}; -write x; -write y; -write z -""" - - -def compile_test(n: Int) : Unit = { - val class_name = "LOOP" - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) - val fw = new java.io.FileWriter(class_name + ".j") - fw.write(assembly) - fw.close() - val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! - println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) -} - -List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) - - - -// Javabyte code assmbler -// -// java -jar jvm/jasmin-2.4/jasmin.jar loops.j - - - - - - diff -r e85600529ca5 -r 4794759139ea scala/crawler.scala --- a/scala/crawler.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,144 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - -// non-existing page -> returns the empty string -get_page("""http://www.foobar.com""") - - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" - -// starts with an " -// then either http or https -// then :// -// then any character that is not " -// finally " -val http_pattern = """\"((?:http|https)://(?:[^\"])*)\"""".r -val http_pattern = """\"(https?://[^\"]*)\"""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// get all urls in startURL -get_all_URLs(get_page(startURL)) - -// number of all urls in startURL -get_all_URLs(get_page(startURL)).toList.length - - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - -crawl(startURL, 2) - - -//breadth-first version without visiting -//pages twice -def bf_crawl(todo: Set[String], visited: Set[String], n: Int) : Unit = { - if (n == 0) () - else { - val new_todo = todo.flatMap { - url => { - if (visited.contains(url)) Set[String]() - else { - println("Visiting: " + n + " " + url) - get_all_URLs(get_page(url)) - } - } - } - bf_crawl(new_todo, visited union todo, n - 1) - } -} - -bf_crawl(Set(startURL1), Set(), 2) - - -//breadth-first version without visiting -//pages twice and only in "my" domain -val my_pattern = """urbanc""".r - -// breadth first search avoiding double searches -def bf_crawl2(todo: Set[String], visited: Set[String], n: Int) : Unit = { - if (n == 0) () - else { - val new_todo = todo.flatMap { - url => { - if (visited.contains(url)) Set[String]() - else if (my_pattern.findFirstIn(url) == None) Set[String]() - else { - println("Visiting: " + n + " " + url); - get_all_URLs(get_page(url)) - } - } - } - bf_crawl2(new_todo, visited union todo, n - 1) - } -} - -bf_crawl2(Set(startURL1), Set(), 5) - -// email harvester -// from -// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ - -val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r - -def bf_crawl3(todo: Set[String], visited: Set[String], n: Int) : Unit = { - if (n == 0) () - else { - val new_todo = todo.flatMap { - url => { - if (visited.contains(url)) Set[String]() - else { - println("Visiting: " + n + " " + url); - val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - get_all_URLs(get_page(url)) - } - } - } - bf_crawl3(new_todo, visited union todo, n - 1) - } -} - -bf_crawl3(Set(startURL1), Set(), 3) - - -// depth-first version does not work, -// because it might visit pages at depth 1 -// while it still wants to visit them at -// depth 2 -var visited = Set("") - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else if (visited.contains(url)) () //println("Already visited: " + n + " " + url) - else { - println("Visiting: " + n + " " + url); - visited += url - for (u <- getAllURLs(getURLpage(url))) crawl(u, n - 1); - } -} diff -r e85600529ca5 -r 4794759139ea scala/crawler1.scala --- a/scala/crawler1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - - -// regex for URLs -val http_pattern = """\"https?://[^\"]*\"""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" -//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/""" - - -// call on the command line -crawl(startURL, 2) - -crawl("""http://www.dcs.kcl.ac.uk/staff/urbanc/msc-projects-12.html""", 2) diff -r e85600529ca5 -r 4794759139ea scala/crawler2.scala --- a/scala/crawler2.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" - -// regex for URLs -val http_pattern = """\"https?://[^\"]*\"""".r -val my_urls = """urbanc""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else if (my_urls.findFirstIn(url) == None) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - -// can now deal with depth 3 -// start on command line -crawl(startURL, 4) - diff -r e85600529ca5 -r 4794759139ea scala/crawler3.scala --- a/scala/crawler3.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" - -// regex for URLs -val http_pattern = """\"https?://[^\"]*\"""".r -val my_urls = """urbanc""".r -val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r - -// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - //else if (my_urls.findFirstIn(url) == None) () - else { - println("Visiting: " + n + " " + url) - val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - for (u <- get_all_URLs(page)) crawl(u, n - 1) - } -} - -// can now deal with depth 3 -// start on command line -crawl(startURL, 3) - diff -r e85600529ca5 -r 4794759139ea scala/html.scala --- a/scala/html.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ - -//:load matcher.scala - -// some regular expressions -val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") -val WORD = PLUS(SYM) - -val BTAG = SEQS("<", WORD, ">") -val ETAG = SEQS("") - -val WHITESPACE = PLUS(RANGE(" \n")) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_WORD(s: String) extends Token -case class T_ETAG(s: String) extends Token -case class T_BTAG(s: String) extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -val lexing_rules: List[Rule[Token]] = - List((BTAG, (s) => T_BTAG(s.mkString)), - (ETAG, (s) => T_ETAG(s.mkString)), - (WORD, (s) => T_WORD(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE)) - -// the tokenizer -val T = Tokenizer(lexing_rules) - -// width for printing -val WIDTH = 60 - - -def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { - case Nil => println(Console.RESET) - case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) - case T_WORD(s)::rest => { - val newstr = Console.RESET + ctr.reverse.mkString + s - if (c + s.length < WIDTH) { - print(newstr); - interpret(rest, c + s.length, ctr) - } - else { - print("\n" + newstr) - interpret(rest, s.length, ctr) - } - } - case T_BTAG("
")::rest => print("\n"); interpret(rest, 0, ctr) - case T_ETAG("
")::rest => print("\n"); interpret(rest, 0, ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) - case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) - case _::rest => interpret(rest, c, ctr) -} - -val test_string = """ -MSc Projects - -
-start of paragraph. a cyan word normal again something longer. -
- - -
Description: - Regular expressions are extremely useful for many text-processing tasks such as finding patterns in texts, - lexing programs, syntax highlighting and so on. Given that regular expressions were - introduced in 1950 by Stephen Kleene, you might think - regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still - an active research area. For example - this paper - about regular expression matching and partial derivatives was presented this summer at the international - PPDP'12 conference. The task in this project is to implement the results from this paper.
- -
The background for this project is that some regular expressions are - evil - and can stab you in the back; according to - this blog post. - For example, if you use in Python or - in Ruby (probably also in other mainstream programming languages) the - innocently looking regular expression a?{28}a{28} and match it, say, against the string - aaaaaaaaaaaaaaaaaaaaaaaaaaaa (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact, - Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself: - re.py (Python version) and - re.rb - (Ruby version). You can imagine an attacker - mounting a nice DoS attack against - your program if it contains such an evil regular expression. Actually - Scala (and also Java) are almost immune from such - attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale - the regular expression and string further to, say, 4,600 as, then you get a - StackOverflowError - potentially crashing your program. -
-""" - -interpret(T.fromString(test_string), 0, Nil) diff -r e85600529ca5 -r 4794759139ea scala/html1.scala --- a/scala/html1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ - - -//:load matcher.scala - - -// some regular expressions -val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) -val BTAG = SEQS("<", NAME, ">") -val ETAG = SEQS("") - -val WORD = PLUS(ALT(LETTER, DIGIT)) -val WHITESPACE = PLUS(RANGE(" \n")) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_WORD(s: String) extends Token -case class T_ETAG(s: String) extends Token -case class T_BTAG(s: String) extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = - tokenize(rs, s.toList) - - - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((BTAG, (s) => T_BTAG(s.mkString)), - (ETAG, (s) => T_ETAG(s.mkString)), - (WORD, (s) => T_WORD(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE)) - -val ts = tokenize_file(lexing_rules, "test.html") - - -val WIDTH = 60 - -def is_tag(t: Token) = t match { - case T_BTAG(_) => true - case T_ETAG(_) => true - case _ => false -} - -def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { - case Nil => println(Console.RESET) - case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) - case T_WORD(s)::rest => { - val newc = c + s.length - val newstr = Console.RESET + ctr.reverse.mkString + s - if (newc < WIDTH) { - print(newstr); - interpret(rest, newc, ctr) - } - else { - print("\n" + newstr) - interpret(rest, s.length, ctr) - } - } - case T_BTAG("
")::rest => print("\n"); interpret(rest, 0, ctr) - case T_ETAG("
")::rest => print("\n"); interpret(rest, 0, ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) - case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) - case _::rest => interpret(rest, c, ctr) -} - -interpret(ts, 0, Nil) diff -r e85600529ca5 -r 4794759139ea scala/i.scala --- a/scala/i.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,492 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLC extends Rexp // recognises any character -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp // negation of a regular expression - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLC => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// regular expression for specifying -// ranges of characters -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// many alternatives -def Alts(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => ALT(r, Alts(rs)) -} -def ALTS(rs: Rexp*) = Alts(rs.toList) - -// repetitions -def Seqs(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => SEQ(r, Seqs(rs)) -} -def SEQS(rs: Rexp*) = Seqs(rs.toList) - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -type Rule[T] = (Rexp, List[Char] => T) - -case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { - - def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - - def one_token(s: List[Char]) : Either[(List[Char], T), String] = { - val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten - if (somes == Nil) Right(s.mkString) - else Left(somes sortBy (_._1.length) head) - } - - def tokenize(cs: List[Char]) : List[T] = cs match { - case Nil => Nil - case _ => one_token(cs) match { - case Left((rest, token)) => token :: tokenize(rest) - case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } - } - } - - def fromString(s: String) : List[T] = - tokenize(s.toList).filterNot(excl.contains(_)) - - def fromFile(name: String) : List[T] = - fromString(io.Source.fromFile(name).mkString) - -} - - -// parser combinators with input type I and return type T - -abstract class Parser[I <% Seq[_], T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head - - def parse_single(ts: I) : T = parse_all(ts).toList match { - case t::Nil => t - case _ => { println ("Parse Error") ; sys.exit(-1) } - } - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - - -// A parser and evaluator for teh while language -// -//:load matcher.scala -//:load parser3.scala - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[Rule[Token]] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Relop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// compiler -val beginning = """ -.class public XXX.XXX -.super java/lang/Object - -.method public ()V - aload_0 - invokenonvirtual java/lang/Object/()V - return -.end method - -.method public static write(I)V - .limit locals 5 - .limit stack 5 - iload 0 - getstatic java/lang/System/out Ljava/io/PrintStream; - swap - invokevirtual java/io/PrintStream/println(I)V - return -.end method - - -.method public static main([Ljava/lang/String;)V - .limit locals 200 - .limit stack 200 - -""" - -val ending = """ - - return - -.end method -""" - -// for generating new labels -var counter = -1 - -def Fresh(x: String) = { - counter += 1 - x ++ "_" ++ counter.toString() -} - -type Env = Map[String, String] -type Instrs = List[String] - -def compile_aexp(a: AExp, env : Env) : Instrs = a match { - case Num(i) => List("ldc " + i.toString + "\n") - case Var(s) => List("iload " + env(s) + "\n") - case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") - case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") - case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") -} - -def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { - case True => Nil - case False => List("goto " + jmp + "\n") - case Relop("=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") - case Relop("!=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") - case Relop("<", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") -} - - -def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { - case Skip => (Nil, env) - case Assign(x, a) => { - val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString - (compile_aexp(a, env) ++ - List("istore " + index + "\n"), env + (x -> index)) - } - case If(b, bl1, bl2) => { - val if_else = Fresh("If_else") - val if_end = Fresh("If_end") - val (instrs1, env1) = compile_bl(bl1, env) - val (instrs2, env2) = compile_bl(bl2, env1) - (compile_bexp(b, env, if_else) ++ - instrs1 ++ - List("goto " + if_end + "\n") ++ - List("\n" + if_else + ":\n\n") ++ - instrs2 ++ - List("\n" + if_end + ":\n\n"), env2) - } - case While(b, bl) => { - val loop_begin = Fresh("Loop_begin") - val loop_end = Fresh("Loop_end") - val (instrs1, env1) = compile_bl(bl, env) - (List("\n" + loop_begin + ":\n\n") ++ - compile_bexp(b, env, loop_end) ++ - instrs1 ++ - List("goto " + loop_begin + "\n") ++ - List("\n" + loop_end + ":\n\n"), env1) - } - case Write(x) => - (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) -} - -def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { - case Nil => (Nil, env) - case s::bl => { - val (instrs1, env1) = compile_stmt(s, env) - val (instrs2, env2) = compile_bl(bl, env1) - (instrs1 ++ instrs2, env2) - } -} - -def compile(input: String) : String = { - val class_name = input.split('.')(0) - val tks = Tok.fromFile(input) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) -} - - -def compile_to(input: String, output: String) = { - val fw = new java.io.FileWriter(output) - fw.write(compile(input)) - fw.close() -} - -// -val tks = Tok.fromString("x := x + 1") -val ast = Stmt.parse_single(tks) -println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) - - - -//examples - -compile_to("loops.while", "loops.j") -//compile_to("fib.while", "fib.j") - - -// testing cases for time measurements -/* -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// for testing -import scala.sys.process._ - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -}; -write x; -write y; -write z -""" - - -def compile_test(n: Int) : Unit = { - val class_name = "LOOP" - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) - val fw = new java.io.FileWriter(class_name + ".j") - fw.write(assembly) - fw.close() - val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! - println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) -} - -List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) - - - -// javabyte code assmbler -// -// java -jar jvm/jasmin-2.4/jasmin.jar loops.j - -*/ - - - - - diff -r e85600529ca5 -r 4794759139ea scala/matcher.scala --- a/scala/matcher.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -package object matcher { - -// regular expressions -// including constructors for NOT and ALLC -sealed abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLC extends Rexp // recognises any character -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp // negation of a regular expression - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLC => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// main class for the tokenizer -case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) { - -def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - -def one_token(s: List[Char]) : Either[(List[Char], T), String] = { - val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten - if (somes == Nil) Right(s.mkString) - else Left(somes sortBy (_._1.length) head) -} - -def tokenize(cs: List[Char]) : List[T] = cs match { - case Nil => Nil - case _ => one_token(cs) match { - case Left((rest, token)) => token :: tokenize(rest) - case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } - } -} - -def fromString(s: String) : List[T] = - tokenize(s.toList).filterNot(excl.contains(_)) - -def fromFile(name: String) : List[T] = - fromString(io.Source.fromFile(name).mkString) - -} - - -// regular expression for specifying -// ranges of characters -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// many alternatives -def Alts(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => ALT(r, Alts(rs)) -} -def ALTS(rs: Rexp*) = Alts(rs.toList) - -// repetitions -def Seqs(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => SEQ(r, Seqs(rs)) -} -def SEQS(rs: Rexp*) = Seqs(rs.toList) - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -} diff -r e85600529ca5 -r 4794759139ea scala/mllex.scala --- a/scala/mllex.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -:load matcher.scala - - -// some regular expressions -val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", - "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", - "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn", - "fun", "functor", "handle", "if", "in", "include", "infix", "infixr", - "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec", - "sharing", "sig", "signature", "struct", "structure", "then", "type", - "val", "where", "while", "with", "withtype")) - -val DIGITS = RANGE("0123456789") -val NONZERODIGITS = RANGE("123456789") - -val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") -val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES) - -val ALL = ALTS(KEYWORDS, INTEGERS) - -val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/") - - - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n".toList)) -val OPS = RANGE("+-*".toList) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_NUM extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = - tokenize(rs, s.toList).filterNot(_ match { - case T_WHITESPACE => true - case _ => false - }) - - - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -tokenize_file(Nil, "nominal_library.ML") - - - - -type Grammar = List[(String, List[Token])] - -// grammar for arithmetic expressions -val grammar = - List ("E" -> List(T_NUM), - "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)), - "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)), - "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)), - "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN)) - -def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match { - case (_, Nil) => true - case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2) - case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2) - case _ => false -} - -def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = - ts1 match { - case Nil => None - case t::ts => - if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) - else chop(ts, prefix, t::ts2) - } - -// examples -chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) -chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) - -def replace[A](ts: List[A], out: List[A], in: List [A]) = - chop(ts, out, Nil) match { - case None => None - case Some((before, after)) => Some(before ::: in ::: after) - } - -def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match { - case List(T_NT("E", tree)) => { println(tree); true } - case _ => { - val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs))) - tss.flatten.exists(parse1(g, _)) - } -} - - -println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1")) -println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)")) -println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)")) - - - diff -r e85600529ca5 -r 4794759139ea scala/parser1.scala --- a/scala/parser1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ -// A naive bottom-up parser with backtracking -// -// Needs: -// :load matcher.scala - -// some regular expressions -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised - -abstract class Token -case object T_WHITESPACE extends Token -case object T_NUM extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class NT(s: String) extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -type Grammar = List[(String, List[Token])] - -// grammar for arithmetic expressions -val grammar = - List ("F" -> List(T_NUM), - "E" -> List(T_NUM), - "E" -> List(NT("E"), T_OP("+"), NT("E")), - "E" -> List(NT("E"), T_OP("-"), NT("E")), - "E" -> List(NT("E"), T_OP("*"), NT("E")), - "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) - - -def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = - ts1 match { - case Nil => None - case t::ts => - if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) - else chop(ts, prefix, t::ts2) - } - -// examples for chop -chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) -chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) - -def replace[A](ts: List[A], out: List[A], in: List [A]) = - chop(ts, out, Nil) match { - case None => None - case Some((before, after)) => Some(before ::: in ::: after) - } - -def parse(g: Grammar, ts: List[Token]) : Boolean = { - println(ts) - if (ts == List(NT("E"))) true - else { - val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) - tss.flatten.exists(parse(g, _)) - } -} - -def parser(g: Grammar, s: String) = { - println("\n") - parse(g, Tok.fromString(s)) -} - - - -parser(grammar, "2 + 3 * 4 + 1") -parser(grammar, "(2 + 3) * (4 + 1)") -parser(grammar, "(2 + 3) * 4 (4 + 1)") - - - diff -r e85600529ca5 -r 4794759139ea scala/parser2.scala --- a/scala/parser2.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -// A naive version of parser combinators producing parse trees -// -// Needs -// :load matcher.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_IF extends Token -case object T_THEN extends Token -case object T_ELSE extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List(("if", (s) => T_IF), - ("then", (s) => T_THEN), - ("else", (s) => T_ELSE), - (NUMBER, (s) => T_NUM(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -// parse trees -abstract class ParseTree -case class Leaf(t: Token) extends ParseTree -case class Branch(pts: List[ParseTree]) extends ParseTree - -def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match { - case Leaf(t) => Branch(List(Leaf(t), pt2)) - case Branch(pts) => Branch(pts ++ List(pt2)) -} - -// parser combinators -abstract class Parser { - def parse(ts: List[Token]): Set[(ParseTree, List[Token])] - - def parse_all(ts: List[Token]) : Set[ParseTree] = - for ((head, tail) <- parse(ts); if (tail == Nil)) yield head - - def || (right : => Parser) : Parser = new AltParser(this, right) - def ~ (right : => Parser) : Parser = new SeqParser(this, right) -} - -class AltParser(p: => Parser, q: => Parser) extends Parser { - def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts) -} - -class SeqParser(p: => Parser, q: => Parser) extends Parser { - def parse(ts: List[Token]) = - for ((head1, tail1) <- p.parse(ts); - (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2) -} - -class ListParser(ps: => List[Parser]) extends Parser { - def parse(ts: List[Token]) = ps match { - case Nil => Set() - case p::Nil => p.parse(ts) - case p::ps => - for ((head1, tail1) <- p.parse(ts); - (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2) - } -} - -case class TokParser(tok: Token) extends Parser { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((Leaf(t), ts)) - case _ => Set () - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -case object IdParser extends Parser { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) - case _ => Set () - } -} - -case object NumParser extends Parser { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) - case _ => Set () - } -} - -lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol -lazy val T: Parser = (F ~ T_OP("*") ~ T) || F -lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser - -println(Tok.fromString("1 + 2 + 3")) -println(E.parse_all(Tok.fromString("1 + 2 + 3"))) - -def eval(t: ParseTree) : Int = t match { - case Leaf(T_NUM(n)) => n.toInt - case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) - case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) - case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) -} - -(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_)) -(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_)) - -lazy val EXPR: Parser = - new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || - new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || - IdParser - -println(EXPR.parse_all(Tok.fromString("if a then b else c"))) -println(EXPR.parse_all(Tok.fromString("if a then if x then y else c"))) - - - - diff -r e85600529ca5 -r 4794759139ea scala/parser2a.scala --- a/scala/parser2a.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -// Parser combinators including semantic actions -// parses lists of tokens -// -// Needs -// :load matcher.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_IF extends Token -case object T_THEN extends Token -case object T_ELSE extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List(("if", (s) => T_IF), - ("then", (s) => T_THEN), - ("else", (s) => T_ELSE), - (NUMBER, (s) => T_NUM(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -// parser combinators with return type T -abstract class Parser[T] { - def parse(ts: List[Token]): Set[(T, List[Token])] - - def parse_all(ts: List[Token]) : Set[T] = - for ((head, tail) <- parse(ts); if (tail == Nil)) yield head - - def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) - def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) - def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) - -} - -class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { - def parse(sb: List[Token]) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { - def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { - def parse (sb: List[Token]) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - - -case class TokParser(tok: Token) extends Parser[Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T -lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F -lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser - -println(E.parse_all(Tok.fromString("1 + 2 + 3"))) -println(E.parse_all(Tok.fromString("1 + 2 * 3"))) -println(E.parse_all(Tok.fromString("(1 + 2) * 3"))) - -// Excercise: implement minus -println(E.parse_all(Tok.fromString("(1 - 2) * 3"))) -println(E.parse_all(Tok.fromString("(1 + 2) * - 3"))) diff -r e85600529ca5 -r 4794759139ea scala/parser3.scala --- a/scala/parser3.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -package object parser { - -// parser combinators -// with input type I and return type T -// -// needs to be compiled with scalac parser3.scala - -abstract class Parser[I <% Seq[_], T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head - - def parse_single(ts: I) : T = parse_all(ts).toList match { - case t::Nil => t - case _ => { println ("Parse Error") ; sys.exit(-1) } - } - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - -} diff -r e85600529ca5 -r 4794759139ea scala/parser4.scala --- a/scala/parser4.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ - -// parser combinators with input type I and return type T - -case class SubString(s: String, l: Int, h: Int) { - def low = l - def high = h - def length = h - l - def substring(l: Int = l, h: Int = h) = s.slice(l, h) - def set(low: Int = l, high: Int = h) = SubString(s, low, high) - -} - -type Ctxt = List[(String, SubString)] - -abstract class Parser[T] { - - def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] - - def parse_all(s: String) : Set[T] = - for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head - - def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) - def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) -} - -class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { - def parse(sb: SubString, ctxt: Ctxt) = - for ((head1, tail1) <- p.parse(sb, ctxt); - (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) -} - -class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) -} - -class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { - def parse(sb: SubString, ctxt: Ctxt) = - for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) -} - -case class SubStringParser(s: String) extends Parser[SubString] { - val n = s.length - def parse(sb: SubString, ctxt: Ctxt) = { - if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) - Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) - else Set() - } -} - -implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) - -class IgnLst[T](p: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = { - if (sb.length == 0) Set() - else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) - yield (head, tail.set(high = tail.high + 1)) - } -} - -class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = { - val should_trim = ctxt.contains (nt, sb) - if (should_trim && sb.length == 0) Set() - else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) - else p.parse(sb, (nt, sb)::ctxt) - } -} - -// ambigous grammar -lazy val E: Parser[Int] = - new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || - (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || - ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || - "0" ==> { (s) => 0 } || - "1" ==> { (s) => 1 } || - "2" ==> { (s) => 2 } || - "3" ==> { (s) => 3 }) - -println(E.parse_all("1+2*3+3")) - - diff -r e85600529ca5 -r 4794759139ea scala/parser5.scala --- a/scala/parser5.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token - -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -// parser combinators with input type I and return type T -// and memoisation - -case class SubList[T](s: List[T], l: Int, h: Int) { - def low = l - def high = h - def length = h - l - def sublist(l: Int = l, h: Int = h) = s.slice(l, h) - def set(low: Int = l, high: Int = h) = SubList(s, low, high) -} - -type Ctxt[T] = List[(String, SubList[T])] - -abstract class Parser[I, T] { - - def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] - - def parse_all(s: List[I]) : Set[T] = - for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = - for ((head1, tail1) <- p.parse(sb, ctxt); - (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) -} - -class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) -} - -class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = - for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) -} - -case object NumParser extends Parser[Token, Int] { - def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { - if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { - case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) - case _ => Set() - } - else Set() - } -} - -case class TokParser(t: Token) extends Parser[Token, Token] { - def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { - if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) - else Set() - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = { - if (sb.length == 0) Set() - else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) - yield (head, tail.set(high = tail.high + 1)) - } -} - -class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = { - val should_trim = ctxt.contains (nt, sb) - if (should_trim && sb.length == 0) Set() - else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) - else p.parse(sb, (nt, sb)::ctxt) - } -} - -lazy val E: Parser[Token, Int] = - new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || - (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} || - (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} || - NumParser) - -println(E.parse_all(Tk.fromString("1 + 2 * 3"))) -println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) diff -r e85600529ca5 -r 4794759139ea scala/re-alt.scala --- a/scala/re-alt.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -trait RegExp { - def nullable: Boolean - def derive(c: Char): RegExp -} - -case object Empty extends RegExp { - def nullable = false - def derive(c: Char) = Empty -} - -case object Eps extends RegExp { - def nullable = true - def derive(c: Char) = Empty -} - -case class Str(s: String) extends RegExp { - def nullable = s.isEmpty - def derive(c: Char) = - if (s.isEmpty || s.head != c) Empty - else Str(s.tail) -} - -case class Cat(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable && s.nullable - def derive(c: Char) = - if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c)) - else Cat(r.derive(c), s) -} - -case class Star(r: RegExp) extends RegExp { - def nullable = true - def derive(c: Char) = Cat(r.derive(c), this) -} - -case class Or(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable || s.nullable - def derive(c: Char) = Or(r.derive(c), s.derive(c)) -} - -case class And(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable && s.nullable - def derive(c: Char) = And(r.derive(c), s.derive(c)) -} - -case class Not(r: RegExp) extends RegExp { - def nullable = !r.nullable - def derive(c: Char) = Not(r.derive(c)) -} - - - - -object Matcher { - def matches(r: RegExp, s: String): Boolean = { - if (s.isEmpty) r.nullable - else matches(r.derive(s.head), s.tail) - } -} - - -object Pimps { - implicit def string2RegExp(s: String) = Str(s) - - implicit def regExpOps(r: RegExp) = new { - def | (s: RegExp) = Or(r, s) - def & (s: RegExp) = And(r, s) - def % = Star(r) - def %(n: Int) = rep(r, n) - def ? = Or(Eps, r) - def ! = Not(r) - def ++ (s: RegExp) = Cat(r, s) - def ~ (s: String) = Matcher.matches(r, s) - } - - implicit def stringOps(s: String) = new { - def | (r: RegExp) = Or(s, r) - def | (r: String) = Or(s, r) - def & (r: RegExp) = And(s, r) - def & (r: String) = And(s, r) - def % = Star(s) - def % (n: Int) = rep(Str(s), n) - def ? = Or(Eps, s) - def ! = Not(s) - def ++ (r: RegExp) = Cat(s, r) - def ++ (r: String) = Cat(s, r) - def ~ (t: String) = Matcher.matches(s, t) - } - - def rep(r: RegExp, n: Int): RegExp = - if (n <= 0) Star(r) - else Cat(r, rep(r, n - 1)) -} - - -object Test { - import Pimps._ - - val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" - val int = ("+" | "-").? ++ digit.%(1) - val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).? - - def main(args: Array[String]) { - val ints = List("0", "-4534", "+049", "99") - val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") - val errs = List("", "-", "+", "+-1", "-+2", "2-") - - ints.foreach(s => assert(int ~ s)) - reals.foreach(s => assert(!(int ~ s))) - errs.foreach(s => assert(!(int ~ s))) - - ints.foreach(s => assert(real ~ s)) - reals.foreach(s => assert(real ~ s)) - errs.foreach(s => assert(!(real ~ s))) - } -} diff -r e85600529ca5 -r 4794759139ea scala/re-internal.scala --- a/scala/re-internal.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ - -// measures the time a function needs -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 10001 by 300) { - val re = ("((a?){" + i + "})(a{" + i + "})") - println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re)))) -} - - - diff -r e85600529ca5 -r 4794759139ea scala/re.scala --- a/scala/re.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// an example list of regular expressions -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = - s match { - case Nil if (nullable(r)) => Some(Nil, t) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), s, t ::: List(c)) - } - -def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { - val somes = regs.map { munch(_, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { - case Nil => Nil - case _ => one_string(regs, s) match { - case (rest, s) => s.mkString :: tokenize(regs, rest) - } -} - -//examples -println(tokenize(regs, "if true then then 42 else +".toList)) -println(tokenize(regs, "if+true+then+then+42+else +".toList)) -println(tokenize(regs, "ifff if 34 34".toList)) -println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(regs, "/* if true then */ then 42 else +".toList)) -//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e85600529ca5 -r 4794759139ea scala/re0.scala --- a/scala/re0.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -import scala.annotation.tailrec - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLCHAR extends Rexp -case class CHAR(c: Char) extends Rexp -case class STR(s: String) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp -case class REP(r: Rexp, n: Int) extends Rexp - -// some convenience for typing in regular expressions -implicit def string2rexp(s : String) : Rexp = STR(s) - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def %(n: Int) = REP(r, n) - def %%(n: Int) = SEQ(REP(r, n), STAR(r)) - def ? = ALT(EMPTY, r) - def unary_! = NOT(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def %(n: Int) = REP(s, n) - def %%(n: Int) = SEQ(REP(s, n), STAR(s)) - def ? = ALT(EMPTY, s) - def unary_! = NOT(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - - -// nullable function: tests whether the regular -// expression can recognise the empty string - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLCHAR => false - case CHAR(_) => false - case STR(s) => s.isEmpty - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) - case REP(r, i) => if (i == 0) true else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLCHAR => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail) - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) - case REP(r, i) => - if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) -} - -// derivative w.r.t. a string (iterates der) -@tailrec -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - -//examples -val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -val int = ("+" | "-").? ~ digit.%%(1) -val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).? - -val ints = List("0", "-4534", "+049", "99") -val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") -val errs = List("", "-", "+", "+-1", "-+2", "2-") - -ints.map(s => matcher(int, s)) -reals.map(s => matcher(int, s)) -errs.map(s => matcher(int, s)) - -ints.map(s => matcher(real, s)) -reals.map(s => matcher(real, s)) -errs.map(s => matcher(real, s)) - - - -def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 12000 by 500) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r e85600529ca5 -r 4794759139ea scala/re1.scala --- a/scala/re1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - -//example -//val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) -//der('b', r) -//der('b', r) - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => EMPTY - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 29) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r e85600529ca5 -r 4794759139ea scala/re2.scala --- a/scala/re2.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ - -abstract class Rexp { - def simp : Rexp = this -} - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, r) => r - case (r, NULL) => r - case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) - case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) - case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) - } -} -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r) => r - case (r, EMPTY) => r - case (r1, r2) => SEQ(r1, r2) - } -} -case class STAR(r: Rexp) extends Rexp { - override def simp = r.simp match { - case NULL => EMPTY - case EMPTY => EMPTY - case r => STAR(r) - } -} - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r).simp) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => EMPTY - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 100) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r e85600529ca5 -r 4794759139ea scala/re3.scala --- a/scala/re3.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - -abstract class Rexp { - def simp : Rexp = this -} - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, r) => r - case (r, NULL) => r - case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) - case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) - case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) - } -} -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r) => r - case (r, EMPTY) => r - case (r1, r2) => SEQ(r1, r2) - } -} -case class STAR(r: Rexp) extends Rexp { - override def simp = r.simp match { - case NULL => EMPTY - case EMPTY => EMPTY - case r => STAR(r) - } -} -case class NTIMES(r: Rexp, n: Int) extends Rexp { - override def simp = if (n == 0) EMPTY else - r.simp match { - case NULL => NULL - case EMPTY => EMPTY - case r => NTIMES(r, n) - } -} - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NTIMES(r, i) => - if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r).simp) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 11001 by 500) { - println(i + " " + + " " + time_needed(1, matcher(RTEST(i), "a" * i))) -} - - diff -r e85600529ca5 -r 4794759139ea scala/re4.scala --- a/scala/re4.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -import scala.annotation.tailrec -abstract class Rexp { - def simp : Rexp = this -} - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, r) => r - case (r, NULL) => r - case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) - case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) - case (r1, r2) => ALT(r1, r2) - } -} -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r) => r - case (r, EMPTY) => r - case (r1, r2) => SEQ(r1, r2) - } -} -case class STAR(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) false else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NTIMES(r, i) => - if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) -} - -// derivative w.r.t. a string (iterates der) -@tailrec -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r).simp) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -/*def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => NULL - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -}*/ - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 13001 by 500) { - println(i + " " + time_needed(1, matcher(RTEST(i), "a" * i))) -} - - diff -r e85600529ca5 -r 4794759139ea scala/regexp.scala --- a/scala/regexp.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -// regular expressions -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// for example -println(STAR("abc")) - -// produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY)))) - - - -// a simple-minded regular expression matcher: -// it loops for examples like STAR(EMPTY) with -// strings this regular expression does not match - -def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match { - case (NULL::rs, s) => false - case (EMPTY::rs, s) => smatchers(rs, s) - case (CHAR(c)::rs, Nil) => false - case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) - case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s) - case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) - case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) - case (Nil, s) => s == Nil -} - -def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList) - -// regular expression: a -println(smatcher(CHAR('a'), "ab")) - -// regular expression: a + (b o c) -println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) - -// regular expression: a + (b o c) -println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) - -// loops for regular expression epsilon* -//println(smatcher(STAR(EMPTY), "a")) - - - -// Regular expression matcher that works properly -//================================================ - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -//examples - -println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) -println(matcher(ALT(STAR("a"), STAR("b")), "")) -println(matcher("abc", "")) -println(matcher(STAR(ALT(EMPTY, "a")), "")) -println(matcher(STAR(EMPTY), "a")) -println(matcher("cab","cab")) -println(matcher(STAR("a"),"aaa")) -println(matcher("cab" ,"cab")) -println(matcher(STAR("a"),"aaa")) - - diff -r e85600529ca5 -r 4794759139ea scala/regexp2.scala --- a/scala/regexp2.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// an example list of regular expressions -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = - s match { - case Nil if (nullable(r)) => Some(Nil, t) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), s, t ::: List(c)) - } - -def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { - val somes = regs.map { munch(_, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { - case Nil => Nil - case _ => one_string(regs, s) match { - case (rest, s) => s.mkString :: tokenize(regs, rest) - } -} - -//examples -println(tokenize(regs, "if true then then 42 else +".toList)) -println(tokenize(regs, "if+true+then+then+42+else +".toList)) -println(tokenize(regs, "ifff if 34 34".toList)) -println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(regs, "/* if true then */ then 42 else +".toList)) -//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e85600529ca5 -r 4794759139ea scala/regexp3.scala --- a/scala/regexp3.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,141 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case class T_IDENT(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KEYWORD(s: String) extends Token - - -// an example list of syntactic rules -type Rule = (Rexp, List[Char] => Token) - -val rules: List[Rule]= - List(("if", (s) => T_KEYWORD(s.mkString)), - ("then", (s) => T_KEYWORD(s.mkString)), - ("else", (s) => T_KEYWORD(s.mkString)), - ("+", (s) => T_OP(s.mkString)), - (IDENT, (s) => T_IDENT(s.mkString)), - (NUMBER, (s) => T_NUM(s.mkString.toInt)), - (WHITESPACES, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - - -def error (s: String) = throw new IllegalArgumentException ("Cannot tokenize: " + s) - -def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - -def one_token (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { - val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (rs: List[Rule], s: List[Char]) : List[Token] = s match { - case Nil => Nil - case _ => one_token(rs, s) match { - case (rest, token) => token :: tokenize(rs, rest) - } -} - -//examples -println(tokenize(rules, "if true then then 42 else +".toList)) -println(tokenize(rules, "if+true+then+then+42+else +".toList)) -println(tokenize(rules, "ifff if 34 34".toList)) -println(tokenize(rules, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(rules, "/* if true then */ then 42 else +".toList)) -//println(tokenize(rules, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e85600529ca5 -r 4794759139ea scala/regexp4.scala --- a/scala/regexp4.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,168 +0,0 @@ -// regular expressions -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// recognises nothing -def zeroable (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => zeroable(r1) && zeroable(r2) - case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) - case STAR(_) => false - case NOT(r) => !(zeroable(r)) -} - -def starts_with (r: Rexp, c: Char) : Boolean = r match { - case NULL => false - case EMPTY => false - case CHAR(d) => (c == d) - case ALT(r1, r2) => starts_with(r1, c) || starts_with(r2, c) - case SEQ(r1, r2) => if (nullable(r1)) (starts_with(r1, c) || starts_with(r2, c)) - else starts_with(r1, c) - case STAR(r) => starts_with(r, c) - case NOT(r) => !(starts_with(r, c)) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGITS = RANGE("0123456789".toList) -val NONZERODIGITS = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) -val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") -val WHITESPACE = RANGE(" \n".toList) -val SYMBOLS = RANGE("/*".toList) - -val ALL = ALT(ALT(ALT(LETTER, DIGITS), WHITESPACE), SYMBOLS) - -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - -println(matcher(NUMBER, "0")) -println(matcher(NUMBER, "01")) -println(matcher(NUMBER, "123450")) - -println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) -println(matcher(ALT(STAR("a"), STAR("b")), "")) -println(matcher("abc", "")) -println(matcher(STAR(ALT(EMPTY, "a")), "")) -println(matcher(STAR(EMPTY), "a")) -println(matcher("cab","cab")) -println(matcher(STAR("a"),"aaa")) -println(matcher("cab" ,"cab")) -println(matcher(STAR("a"),"aaa")) - -println(matcher(COMMENT, "/* */")) -println(matcher(COMMENT, "/* foobar comment */")) -println(matcher(COMMENT, "/* test */ test */")) - -// an example list of regular expressions -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, COMMENT, WHITESPACE) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = - if (zeroable(r)) None else s match { - case Nil => if (nullable(r)) Some(Nil, t) else None - case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, t) - //case c::s if (zeroable(der (c, r))) => None - case c::s => munch(der (c, r), s, t ::: List(c)) -} - - -def lex_one (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { - val somes = regs.map { munch(_, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def lex_all (regs: List[Rexp], s: List[Char]) : List[String] = s match { - case Nil => Nil - case _ => lex_one(regs, s) match { - case (rest, s) => s.mkString :: lex_all(regs, rest) - } -} - - - -starts_with(der('/', COMMENT), '*') - -munch(COMMENT, "/*ifff if 34 */".toList, Nil) -val COMMENT2 = NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL))) - -der('a', COMMENT2) -zeroable(der('a', COMMENT2)) - -matcher(COMMENT2, "ifff if 34") -munch(COMMENT2, "ifff if 34".toList, Nil) -starts_with(COMMENT2, 'i') -lex_all(regs, "ifff if 34".toList) -lex_all(regs, "ifff $ if 34".toList) - diff -r e85600529ca5 -r 4794759139ea scala/regexp5.scala --- a/scala/regexp5.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,177 +0,0 @@ -// regular expressions -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// recognises nothing -def zeroable (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => zeroable(r1) && zeroable(r2) - case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1) - //zeroable(r1) || zeroable(r2) - case STAR(_) => false - case NOT(r) => !(zeroable(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGITS = RANGE("0123456789".toList) -val NONZERODIGITS = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) -val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") -val WHITESPACE = RANGE(" \n".toList) - -val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE) - -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - -println(matcher(NUMBER, "0")) -println(matcher(NUMBER, "01")) -println(matcher(NUMBER, "123450")) - -println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) -println(matcher(ALT(STAR("a"), STAR("b")), "")) -println(matcher("abc", "")) -println(matcher(STAR(ALT(EMPTY, "a")), "")) -println(matcher(STAR(EMPTY), "a")) -println(matcher("cab","cab")) -println(matcher(STAR("a"),"aaa")) -println(matcher("cab" ,"cab")) -println(matcher(STAR("a"),"aaa")) - -println(matcher(COMMENT, "/* */")) -println(matcher(COMMENT, "/* 34 */")) -println(matcher(COMMENT, "/* foobar comment */")) -println(matcher(COMMENT, "/* test */ test */")) - -// an example list of regular expressions - -abstract class Token - -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case class T_IDENT(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KEYWORD(s: String) extends Token - -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACE) - -type Rule = (Rexp, List[Char] => Token) - -val rules: List[Rule]= - List(("if", (s) => T_KEYWORD(s.mkString)), - ("then", (s) => T_KEYWORD(s.mkString)), - ("else", (s) => T_KEYWORD(s.mkString)), - ("+", (s) => T_OP(s.mkString)), - (IDENT, (s) => T_IDENT(s.mkString)), - (NUMBER, (s) => T_NUM(s.mkString.toInt)), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = -{ println("string " + s) - println(" rexp " + r) - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => { println("1"); None } - case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (zeroable(der (c, r))) => { println("2"); None } - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } -} - -def lex_one (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { - val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def lex_all (rs: List[Rule], s: List[Char]) : List[Token] = s match { - case Nil => Nil - case _ => lex_one(rs, s) match { - case (rest, t) => t :: lex_all(rs, rest) - } -} - - - -println(matcher(COMMENT, "/*ifff if 34 34*/")) -rules.map { (r) => munch(r._1, r._2, "/*ifff if 34 34*/ ".toList, Nil) } -println(lex_all(rules, "ifff if 34 34".toList)) -println(lex_all(rules, " /*ifff if 34 34*/ ".toList)) -println(lex_all(rules, "ifff $ if 34".toList)) - - diff -r e85600529ca5 -r 4794759139ea scala/scraper.scala --- a/scala/scraper.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -import java.io.OutputStreamWriter -import java.net.URL -import scala.io.Source.fromInputStream - -val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") - -//connect to url -val conn = url.openConnection -conn.setRequestProperty("User-Agent", "") -conn.setDoOutput(true) -conn.connect - -//sending data -val wr = new OutputStreamWriter(conn.getOutputStream()) -//wr.write("Fdate=2012-9-24&Tdate=2012-09-25") -//wr.write("Fdate=2012-9-18&Tdate=2012-09-25") -wr.write("Fdate=2001-5-18&Tdate=2012-09-25") -wr.flush -wr.close - -//receiving data -val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") - -//println(page) - -// regular expression . excludes newlines, -// therefore we have to use [\S\s] -val regex1 = """[\S\s]*?""".r -val rows = regex1.findAllIn(page).toList - -//print(rows) - -val regex2 = """([\S\s]*?)""".r - -def aux(s: String) : Array[String] = { - for (m <- regex2.findAllIn(s).toArray) yield m match { - case regex2(value) => value.trim - } -} - -val data = rows.map { aux } - -def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt - -//day with highest particle pollution (PM_10) -data.sortWith(compare(1)).last - -//day with highest sulfur dioxide (SO_2) -data.sortWith(compare(2)).last - -//day with highest nitro dioxide (NO_2) -data.sortWith(compare(3)).last - -//days with highest PM_10 -val groups = data.groupBy(_(1).toInt) -val max_key = groups.keySet.max -groups(max_key) diff -r e85600529ca5 -r 4794759139ea scala/while.scala --- a/scala/while.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,231 +0,0 @@ -// A parser and evaluator for teh while language -// -import matcher._ -import parser._ - - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } || - (T_KWD("true") ==> ((_) => True)) || - (T_KWD("false") ==> ((_) => False: BExp)) - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - - -// examples -val p1 = "x := 5" -val p1_toks = Tok.fromString(p1) -val p1_ast = Block.parse_all(p1_toks) -println(p1_toks) -println(p1_ast) - -val p1a = "{ x := 5; y := 8}" -val p1a_toks = Tok.fromString(p1a) -val p1a_ast = Block.parse_all(p1a_toks) -println(p1a_ast) - -val p2 = "5 = 6" -val p2_toks = Tok.fromString(p2) -val p2_ast = BExp.parse_all(p2_toks) -println(p2_ast) - -val p2a = "true" -val p2a_toks = Tok.fromString(p2a) -val p2a_ast = BExp.parse_all(p2a_toks) -println(p2a_ast) - -val p3 = "if true then skip else skip" -val p3_toks = Tok.fromString(p3) -val p3_ast = Stmt.parse_all(p3_toks) -println(p3_ast) - -val p3a = "if true then x := 5 else x := 10" -val p3a_toks = Tok.fromString(p3a) -val p3a_ast = Stmt.parse_all(p3a_toks) -println(p3a_ast) - -val p3b = "if false then x := 5 else x := 10" -val p3b_toks = Tok.fromString(p3b) -val p3b_ast = Stmt.parse_all(p3b_toks) -println(p3b_ast) - -// multiplication -val p4 = """{ x := 5; - y := 4; - r := 0; - while y > 0 do { - r := r + x; - y := y - 1 - } - }""" -val p4_toks = Tok.fromString(p4) -val p4_ast = Block.parse_all(p4_toks) -println(p4_ast) - -val p5 = """ - n := 9; - minus1 := 0; - minus2 := 1; - temp := 0; - while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 - }; - fib_res := minus2 -""" -val p5_toks = Tok.fromString(p5) -val p5_ast = Stmts.parse_all(p5_toks) - -// interpreter -type Env = Map[String, Int] - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) -} - -def eval_aexp(a: AExp, env : Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -//examples -println(eval_stmt(p3a_ast.head, Map.empty)) -println(eval_stmt(p3b_ast.head, Map.empty)) -println(eval_bl(p4_ast.head, Map.empty)) -println(eval_bl(p5_ast.head, Map.empty)) diff -r e85600529ca5 -r 4794759139ea scala/while1.scala --- a/scala/while1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,220 +0,0 @@ -// A parser and evaluator for the WHILE language -// -import matcher._ -import parser._ - - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// interpreter -type Env = Map[String, Int] - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) -} - -def eval_aexp(a: AExp, env : Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env - case Write(x) => { println(env(x)); env } -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -def eval_prog(name: String) : Env = { - val tks = Tok.fromFile(name) - val ast = Stmts.parse_single(tks) - eval_bl(ast, Map.empty) -} - - -//examples - -//eval_prog("loops.while") -eval_prog("fib.while") - - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -""" - - - -def eval_test(n: Int) : Unit = { - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - println(n + " " + time_needed(2, eval_bl(ast, Map.empty))) -} - -List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_)) - - - - - - - diff -r e85600529ca5 -r 4794759139ea slides/beamerthemeplainculight.sty --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/beamerthemeplainculight.sty Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,101 @@ +%%\Providespackage{beamerthemeplainculight}[2003/11/07 ver 0.93] +\NeedsTeXFormat{LaTeX2e}[1995/12/01] + +% Copyright 2003 by Till Tantau . +% +% This program can be redistributed and/or modified under the terms +% of the LaTeX Project Public License Distributed from CTAN +% archives in directory macros/latex/base/lppl.txt. + +\newcommand{\slidecaption}{} + +\mode + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% comic fonts fonts +\DeclareFontFamily{T1}{comic}{}% +\DeclareFontShape{T1}{comic}{m}{n}{<->s*[.9]comic8t}{}% +\DeclareFontShape{T1}{comic}{m}{it}{<->s*[.9]comic8t}{}% +\DeclareFontShape{T1}{comic}{m}{sc}{<->s*[.9]comic8t}{}% +\DeclareFontShape{T1}{comic}{b}{n}{<->s*[.9]comicbd8t}{}% +\DeclareFontShape{T1}{comic}{b}{it}{<->s*[.9]comicbd8t}{}% +\DeclareFontShape{T1}{comic}{m}{sl}{<->ssub * comic/m/it}{}% +\DeclareFontShape{T1}{comic}{b}{sc}{<->sub * comic/m/sc}{}% +\DeclareFontShape{T1}{comic}{b}{sl}{<->ssub * comic/b/it}{}% +\DeclareFontShape{T1}{comic}{bx}{n}{<->ssub * comic/b/n}{}% +\DeclareFontShape{T1}{comic}{bx}{it}{<->ssub * comic/b/it}{}% +\DeclareFontShape{T1}{comic}{bx}{sc}{<->sub * comic/m/sc}{}% +\DeclareFontShape{T1}{comic}{bx}{sl}{<->ssub * comic/b/it}{}% +% +\renewcommand{\rmdefault}{comic}% +\renewcommand{\sfdefault}{comic}% +\renewcommand{\mathfamilydefault}{cmr}% mathfont should be still the old one +% +\DeclareMathVersion{bold}% mathfont needs to be bold +\DeclareSymbolFont{operators}{OT1}{cmr}{b}{n}% +\SetSymbolFont{operators}{bold}{OT1}{cmr}{b}{n}% +\DeclareSymbolFont{letters}{OML}{cmm}{b}{it}% +\SetSymbolFont{letters}{bold}{OML}{cmm}{b}{it}% +\DeclareSymbolFont{symbols}{OMS}{cmsy}{b}{n}% +\SetSymbolFont{symbols}{bold}{OMS}{cmsy}{b}{n}% +\DeclareSymbolFont{largesymbols}{OMX}{cmex}{b}{n}% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Frametitles +\setbeamerfont{frametitle}{size={\LARGE}} +\setbeamerfont{frametitle}{family={\usefont{T1}{ptm}{b}{n}}} +\setbeamercolor{frametitle}{fg=ProcessBlue,bg=white} + +\setbeamertemplate{frametitle}{% +\vskip 2mm % distance from the top margin +\hskip -3mm % distance from left margin +\vbox{% +\begin{minipage}{1.05\textwidth}% +\centering% +\insertframetitle% +\end{minipage}}% +} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Foot +% +\setbeamertemplate{navigation symbols}{} +\usefoottemplate{% +\vbox{% + \tinyline{% + \tiny\hfill\textcolor{gray!50}{\slidecaption{} -- + p.~\insertframenumber/\inserttotalframenumber}}}% +} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\beamertemplateballitem +\setlength\leftmargini{2mm} +\setlength\leftmarginii{0.6cm} +\setlength\leftmarginiii{1.5cm} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% blocks +%\definecolor{cream}{rgb}{1,1,.65} +\definecolor{cream}{rgb}{1,1,.8} +\setbeamerfont{block title}{size=\normalsize} +\setbeamercolor{block title}{fg=black,bg=cream} +\setbeamercolor{block body}{fg=black,bg=cream} + +\setbeamertemplate{blocks}[rounded][shadow=true] + +\setbeamercolor{boxcolor}{fg=black,bg=cream} + +\mode + + + + + + + diff -r e85600529ca5 -r 4794759139ea slides/compiled.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/compiled.data Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,13 @@ +%% LaTeX2e file `compiled.data' +%% generated by the `filecontents' environment +%% from source `slides09' on 2012/11/28. +%% +%1 0.234146 +%5000 0.227539 +%10000 0.280748 +50000 1.087897 +100000 3.713165 +250000 21.6624545 +500000 85.872613 +750000 203.6408015 +1000000 345.736574 diff -r e85600529ca5 -r 4794759139ea slides/compiled2.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/compiled2.data Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,11 @@ +%% LaTeX2e file `compiled2.data' +%% generated by the `filecontents' environment +%% from source `slides09' on 2012/11/28. +%% +200 0.222058 +400 0.215204 +600 0.202031 +800 0.21986 +1000 0.205934 +1200 0.1981615 +1400 0.207116 diff -r e85600529ca5 -r 4794759139ea slides/getting-info-from-the-internet.jpg Binary file slides/getting-info-from-the-internet.jpg has changed diff -r e85600529ca5 -r 4794759139ea slides/interpreted.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/interpreted.data Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,13 @@ +%% LaTeX2e file `interpreted.data' +%% generated by the `filecontents' environment +%% from source `slides09' on 2012/11/28. +%% +%1 0.00503 +200 1.005863 +400 7.8296765 +500 15.43106 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 diff -r e85600529ca5 -r 4794759139ea slides/interpreted2.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/interpreted2.data Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,12 @@ +%% LaTeX2e file `interpreted2.data' +%% generated by the `filecontents' environment +%% from source `slides09' on 2012/11/28. +%% +%1 0.00503 +200 1.005863 +400 7.8296765 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 diff -r e85600529ca5 -r 4794759139ea slides/slides01.pdf Binary file slides/slides01.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides01.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides01.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,483 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 01, King's College London, 26.~September 2012} + + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (1)\\[-3mm] + \end{tabular}} + + \begin{center} + \includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} + \includegraphics[scale=0.31]{pics/ante2.jpg}\\ + \footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} + \end{center} + +\normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{textblock}{1}(2,5) +\begin{tabular}{c} +\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm] +\small Server +\end{tabular} +\end{textblock} + +\begin{textblock}{1}(5.6,4) + \begin{tikzpicture}[scale=1.1] + \draw[white] (0,1) node (X) {}; + \draw[white] (2,1) node (Y) {}; + \draw[white] (0,0) node (X1) {}; + \draw[white] (2,0) node (Y1) {}; + \draw[white] (0,-1) node (X2) {}; + \draw[white] (2,-1) node (Y2) {}; + \draw[red, <-, line width = 2mm] (X) -- (Y); + \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {}; + \draw[red, ->, line width = 2mm] (X1) -- (Y1); + \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {}; + \draw[red, <-, line width = 2mm] (X2) -- (Y2); + \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {}; + \end{tikzpicture} +\end{textblock} + + +\begin{textblock}{1}(9,5.5) +\begin{tabular}{c} +\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm] +\small Browser +\end{tabular} +\end{textblock} + +\only<2>{ +\begin{textblock}{10}(2,13.5) +\begin{itemize} +\item programming languages, compilers +\end{itemize} +\end{textblock}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +transforming strings into structured data\\[10mm] + +{\LARGE\bf Lexing}\medskip\\ +\hspace{5mm}(recognising ``words'')\\[6mm] + +{\LARGE\bf Parsing}\medskip\\ +\hspace{5mm}(recognising ``sentences'') + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +The subject is quite old: + +\begin{itemize} +\item Turing Machines, 1936 +\item first compiler for COBOL, 1957 (Grace Hopper) +\item but surprisingly research papers are still published now +\end{itemize} + +\begin{flushright} +\includegraphics[scale=0.3]{pics/hopper.jpg}\\ +\footnotesize\textcolor{gray}{Grace Hopper} +\end{flushright} + +{\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show, \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}This Course\end{tabular}} + +\begin{itemize} +\item the ultimate goal is to implement a small web-browser (really small one)\bigskip +\end{itemize} + +Let's start with: + +\begin{itemize} +\item a web-crawler +\item an email harvester +\item a web-scraper +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Web-Crawler\end{tabular}} + +\mbox{}\\[10mm] + +\begin{enumerate} +\item given an URL, read the corresponding webpage +\item extract all links from it +\item call the web-crawler again for all these links +\end{enumerate} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Web-Crawler\end{tabular}} + +\mbox{}\\[10mm] + + +\begin{enumerate} +\item given an URL, read the corresponding webpage +\item if not possible print, out a problem +\item if possible, extract all links from it +\item call the web-crawler again for all these links +\end{enumerate}\bigskip\pause + +\small (we need a bound for the number of recursive calls) + +\small (the purpose is to check all links on my own webpage) +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Scala\end{tabular}} + +\footnotesize a simple Scala function for reading webpages\\[-3mm] + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app0.scala}}}\pause +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}}}\pause\bigskip + + +\footnotesize slightly more complicated for handling errors properly:\\[-3mm] + +\footnotesize +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app1.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Regular Expression\end{tabular}} + +\begin{itemize} +\item \ldots{} is a pattern or template for specifying strings +\end{itemize}\bigskip + +\begin{center} +\only<1>{{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{"https?://[$\hat{\hspace{2mm}}$"]*"}}}% +\only<2>{{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{"""\textbackslash{}"https?://[$\hat{\hspace{2mm}}$\textbackslash{}"]*\textbackslash{}"""".r}}} +\end{center}\bigskip\bigskip + +matches for example\\ +\;{\lstset{language=Scala}\fontsize{12}{14}\selectfont\bf +\texttt{"http://www.foobar.com"}}\\ +\;{\lstset{language=Scala}\fontsize{12}{14}\selectfont\bf +\texttt{"https://www.tls.org"}}\\ + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{rexp.findAllIn(string)}}\medskip + +returns a list of all (sub)strings that match the regular expression\bigskip\bigskip + +{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{rexp.findFirstIn(string)}}\medskip + +returns either {\bf\texttt{None}} if no (sub)string matches +or {\bf\texttt{Some(s)}} with the first (sub)string + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app2.scala}}}\medskip + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{crawl(some\_start\_URL, 2)}}\ + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\footnotesize +a version that only ``crawls'' links in my domain: + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app3.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\footnotesize +a little email ``harvester'': + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app4.scala}}}\bigskip + +\tiny +\textcolor{gray}{\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +Their inductive definition:\medskip + +\begin{textblock}{6}(2,5) + \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} + \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ + & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ + & \bl{$\mid$} & \bl{c} & character\\ + & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ + & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ + \end{tabular} + \end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +\small +In Scala: + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app51.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] Regular Expression\end{tabular}} + +\begin{textblock}{15}(1,4) + \begin{tabular}{@ {}rcl} + \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ + \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ + \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ + \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ + \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$ + $L$(r$_2$) $\}$}\\ + \bl{$L$(r$^*$)} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\ + \end{tabular}\bigskip + +\onslide<2->{ +\hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ +\bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ +\small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$ + $L$(r)$^n$ $\}$}} +} + \end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}} + +\large +a regular expression \bl{r} matches a string \bl{s} is defined as + +\begin{center} +\bl{s $\in$ $L$(r)}\\ +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}This Course\end{tabular}} + +We will have a look at: + +\begin{itemize} +\item regular expressions / regular expression matching +\item automata +\item the Myhill-Nerode theorem +\item parsing +\item grammars +\item a small interpreter / web browser +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Exam\end{tabular}} + +\begin{itemize} +\item The question ``Is this relevant for the exam?'' is not appreciated!\bigskip\\ + +Whatever is in the homework sheets (and is not marked ``optional'') is relevant for the +exam.\\ No code needs to be written in the exam. +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides02.pdf Binary file slides/slides02.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides02.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,494 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 02, King's College London, 3.~October 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (2)\\[3mm] + \end{tabular}} + + %\begin{center} + %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} + %\includegraphics[scale=0.31]{pics/ante2.jpg}\\ + %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} + %\end{center} + +\normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Languages\end{tabular}} + +A \alert{language} is a set of strings.\bigskip + +A \alert{regular expression} specifies a set of strings or language. + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +Their inductive definition: + + +\begin{textblock}{6}(2,5) + \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} + \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ + & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ + & \bl{$\mid$} & \bl{c} & character\\ + & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ + & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ + \end{tabular} + \end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +Their implementation in Scala: + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app51.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] Regular Expression\end{tabular}} + +\begin{textblock}{15}(1,4) + \begin{tabular}{@ {}rcl} + \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ + \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ + \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ + \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ + \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) @ $L$(r$_2$)}\\ + \bl{$L$(r$^*$)} & \bl{$\dn$} & \bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}\\ + \end{tabular}\bigskip + +\hspace{5mm}\textcolor{gray}{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ +\textcolor{gray}{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$} +\end{textblock} + +\only<2->{ +\begin{textblock}{5}(11,5) +\textcolor{gray}{\small +A @ B\\ +\ldots you take out every string from A and +concatenate it with every string in B +} +\end{textblock}} + +\only<3->{ +\begin{textblock}{6}(9,12)\small +\bl{$L$} is a function from regular expressions to sets of strings\\ +\bl{$L$ : Rexp $\Rightarrow$ Set[String]} +\end{textblock}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\large +\begin{center} +What is \bl{$L$(a$^*$)}? +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +\newcommand{\YES}{\textcolor{gray}{yes}} +\newcommand{\NO}{\textcolor{gray}{no}} +\newcommand{\FORALLR}{\textcolor{gray}{$\forall$ r.}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Reg Exp Equivalences\end{tabular}} + +\begin{center} +\begin{tabular}{l@ {\hspace{7mm}}rcl@ {\hspace{7mm}}l} +&\bl{(a + b) + c} & \bl{$\equiv^?$} & \bl{a + (b + c)} & \onslide<2->{\YES}\\ +&\bl{a + a} & \bl{$\equiv^?$} & \bl{a} & \onslide<3->{\YES}\\ +&\bl{(a $\cdot$ b) $\cdot$ c} & \bl{$\equiv^?$} & \bl{a $\cdot$ (b $\cdot$ c)} & \onslide<4->{\YES}\\ +&\bl{a $\cdot$ a} & \bl{$\equiv^?$} & \bl{a} & \onslide<5->{\NO}\\ +&\bl{$\epsilon^*$} & \bl{$\equiv^?$} & \bl{$\epsilon$} & \onslide<6->{\YES}\\ +&\bl{$\varnothing^*$} & \bl{$\equiv^?$} & \bl{$\varnothing$} & \onslide<7->{\NO}\\ +\FORALLR &\bl{r $\cdot$ $\epsilon$} & \bl{$\equiv^?$} & \bl{r} & \onslide<8->{\YES}\\ +\FORALLR &\bl{r + $\epsilon$} & \bl{$\equiv^?$} & \bl{r} & \onslide<9->{\NO}\\ +\FORALLR &\bl{r + $\varnothing$} & \bl{$\equiv^?$} & \bl{r} & \onslide<10->{\YES}\\ +\FORALLR &\bl{r $\cdot$ $\varnothing$} & \bl{$\equiv^?$} & \bl{r} & \onslide<11->{\NO}\\ +&\bl{c $\cdot$ (a + b)} & \bl{$\equiv^?$} & \bl{(c $\cdot$ a) + (c $\cdot$ b)} & \onslide<12->{\YES}\\ +&\bl{a$^*$} & \bl{$\equiv^?$} & \bl{$\epsilon$ + (a $\cdot$ a$^*$)} & \onslide<13->{\YES} +\end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}} + +\large +a regular expression \bl{r} matches a string \bl{s} is defined as + +\begin{center} +\bl{s $\in$ $L$(r)}\\ +\end{center}\bigskip\bigskip\pause + +\small +if \bl{r$_1$ $\equiv$ r$_2$}, then \bl{$s$ $\in$ $L$(r$_1$)} iff \bl{$s$ $\in$ $L$(r$_2$)} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Matching Algorithm\end{tabular}} + +\begin{itemize} +\item given a regular expression \bl{r} and a string \bl{s}, say yes or no for whether +\begin{center} +\bl{s $\in$ $L$(r)} +\end{center} +or not.\bigskip\bigskip\pause +\end{itemize}\pause + +\small +\begin{itemize} +\item Identifiers (strings of letters or digits, starting with a letter) +\item Integers (a non-empty sequence of digits) +\item Keywords (else, if, while, \ldots) +\item White space (a non-empty sequence of blanks, newlines and tabs) +\end{itemize} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}A Matching Algorithm\end{tabular}} + +\small +whether a regular expression matches the empty string:\medskip + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app5.scala}}} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}} + +\large +If \bl{r} matches the string \bl{c::s}, what is a regular expression that matches \bl{s}?\bigskip\bigskip\bigskip\bigskip + +\small +\bl{der c r} gives the answer +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Derivative of a Rexp (2)\end{tabular}} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} + \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ + \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ + \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then $\epsilon$ else $\varnothing$} & \\ + \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ + \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ + & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ + & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ + \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)} &\smallskip\\\pause + + \bl{ders [] r} & \bl{$\dn$} & \bl{r} & \\ + \bl{ders (c::s) r} & \bl{$\dn$} & \bl{ders s (der c r)} & \\ + \end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Derivative\end{tabular}} + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app6.scala}}} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Rexp Matcher\end{tabular}} + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app7.scala}}} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Proofs about Rexp\end{tabular}} + +Remember their inductive definition:\\[5cm] + +\begin{textblock}{6}(5,5) + \begin{tabular}{@ {}rrl} + \bl{r} & \bl{$::=$} & \bl{$\varnothing$}\\ + & \bl{$\mid$} & \bl{$\epsilon$} \\ + & \bl{$\mid$} & \bl{c} \\ + & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$}\\ + & \bl{$\mid$} & \bl{r$_1$ + r$_2$} \\ + & \bl{$\mid$} & \bl{r$^*$} \\ + \end{tabular} + \end{textblock} + +If we want to prove something, say a property \bl{$P$(r)}, for all regular expressions \bl{r} then \ldots + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs about Rexp (2)\end{tabular}} + +\begin{itemize} +\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip +\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already +holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip +\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already +holds for \bl{r$_1$} and \bl{r$_2$}. +\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already +holds for \bl{r}. +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs about Rexp (3)\end{tabular}} + +Assume \bl{$P(r)$} is the property: + +\begin{center} +\bl{nullable(r)} if and only if \bl{"" $\in$ $L$(r)} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs about Strings\end{tabular}} + +If we want to prove something, say a property \bl{$P$(s)}, for all strings \bl{s} then \ldots\bigskip + +\begin{itemize} +\item \bl{$P$} holds for the empty string, and\medskip +\item \bl{$P$} holds for the string \bl{c::s} under the assumption that \bl{$P$} +already holds for \bl{s} +\end{itemize} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs about Strings (2)\end{tabular}} + +Let \bl{Der c A} be the set defined as + +\begin{center} +\bl{Der c A $\dn$ $\{$ s $|$ c::s $\in$ A$\}$ } +\end{center} + +Assume that \bl{$L$(der c r) = Der c ($L$(r))}. Prove that + +\begin{center} +\bl{matcher(r, s) if and only if s $\in$ $L$(r)} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Regular Languages\end{tabular}} + +A language (set of strings) is \alert{regular} iff there exists +a regular expression that recognises all its strings. + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Automata\end{tabular}} + +A deterministic finite automaton consists of: + +\begin{itemize} +\item a set of states +\item one of these states is the start state +\item some states are accepting states, and +\item there is transition function\medskip + +\small +which takes a state as argument and a character and produces a new state\smallskip\\ +this function might not always be defined +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides03.pdf Binary file slides/slides03.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides03.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides03.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,386 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 03, King's College London, 10.~October 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (3)\\[3mm] + \end{tabular}} + + %\begin{center} + %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} + %\includegraphics[scale=0.31]{pics/ante2.jpg}\\ + %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} + %\end{center} + +\normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + & \alert{\bf (I have put a temporary link in there.)}\\ + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Last Week\end{tabular}} + +Last week I showed you + +\begin{itemize} +\item one simple-minded regular expression matcher (which however does not work in all cases), and\bigskip +\item one which works provably in all cases + +\begin{center} +\bl{matcher r s} \;\;if and only if \;\; \bl{s $\in$ $L$(r)} +\end{center} +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} + \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ + \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ + \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then $\epsilon$ else $\varnothing$} & \\ + \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ + \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ + & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ + & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ + \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)}\\ + \end{tabular} +\end{center} + +``the regular expression after \bl{c} has been recognised'' + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +For this we defined the set \bl{Der c A} as + +\begin{center} +\bl{Der c A $\dn$ $\{$ s $|$ c::s $\in$ A$\}$ } +\end{center} + +which is called the semantic derivative of a set +and proved + +\begin{center} +\bl{$L$(der c r) $=$ Der c ($L$(r))} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Idea of the Algorithm\end{tabular}} + +If we want to recognise the string \bl{abc} with regular expression \bl{r} +then\medskip + +\begin{enumerate} +\item \bl{Der a ($L$(r))}\pause +\item \bl{Der b (Der a ($L$(r)))} +\item \bl{Der c (Der b (Der a ($L$(r))))}\pause +\item finally we test whether the empty string is in set\pause\medskip +\end{enumerate} + +The matching algorithm works similarly, just over regular expression than sets. +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Input: string \bl{abc} and regular expression \bl{r} + +\begin{enumerate} +\item \bl{der a r} +\item \bl{der b (der a r)} +\item \bl{der c (der b (der a r))}\pause +\item finally check whether the latter regular expression can match the empty string +\end{enumerate} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +We need to prove + +\begin{center} +\bl{$L$(der c r) $=$ Der c ($L$(r))} +\end{center} + +by induction on the regular expression. + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs about Rexp\end{tabular}} + +\begin{itemize} +\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip +\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already +holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip +\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already +holds for \bl{r$_1$} and \bl{r$_2$}. +\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already +holds for \bl{r}. +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs about Natural Numbers\\ and Strings\end{tabular}} + +\begin{itemize} +\item \bl{$P$} holds for \bl{$0$} and +\item \bl{$P$} holds for \bl{$n + 1$} under the assumption that \bl{$P$} already +holds for \bl{$n$} +\end{itemize}\bigskip + +\begin{itemize} +\item \bl{$P$} holds for \bl{\texttt{""}} and +\item \bl{$P$} holds for \bl{$c\!::\!s$} under the assumption that \bl{$P$} already +holds for \bl{$s$} +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +\begin{center} + \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} + \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ + & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ + & \bl{$\mid$} & \bl{c} & character\\ + & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ + & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ + \end{tabular}\bigskip\pause + \end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Languages\end{tabular}} + +A \alert{language} is a set of strings.\bigskip + +A \alert{regular expression} specifies a set of strings or language.\bigskip + +A language is \alert{regular} iff there exists +a regular expression that recognises all its strings.\bigskip\bigskip\pause + +\textcolor{gray}{not all languages are regular, e.g.~\bl{a$^n$b$^n$}.} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +\begin{center} + \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} + \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ + & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ + & \bl{$\mid$} & \bl{c} & character\\ + & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ + & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ + \end{tabular}\bigskip + \end{center} + +How about ranges \bl{[a-z]}, \bl{r$^\text{+}$} and \bl{!r}? + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Negation of Regular Expr's\end{tabular}} + +\begin{itemize} +\item \bl{!r} \hspace{6mm} (everything that \bl{r} cannot recognise)\medskip +\item \bl{$L$(!r) $\dn$ UNIV - $L$(r)}\medskip +\item \bl{nullable (!r) $\dn$ not (nullable(r))}\medskip +\item \bl{der\,c\,(!r) $\dn$ !(der\,c\,r)} +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Regular Exp's for Lexing\end{tabular}} + +Lexing separates strings into ``words'' / components. + +\begin{itemize} +\item Identifiers (non-empty strings of letters or digits, starting with a letter) +\item Numbers (non-empty sequences of digits omitting leading zeros) +\item Keywords (else, if, while, \ldots) +\item White space (a non-empty sequence of blanks, newlines and tabs) +\item Comments +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Automata\end{tabular}} + +A deterministic finite automaton consists of: + +\begin{itemize} +\item a set of states +\item one of these states is the start state +\item some states are accepting states, and +\item there is transition function\medskip + +\small +which takes a state as argument and a character and produces a new state\smallskip\\ +this function might not always be defined +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides04.pdf Binary file slides/slides04.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides04.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides04.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,612 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 04, King's College London, 17.~October 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (4)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Last Week\end{tabular}} + +Last week I showed you\bigskip + +\begin{itemize} +\item a tokenizer taking a list of regular expressions\bigskip + +\item tokenization identifies lexeme in an input stream of characters (or string) +and cathegorizes them into tokens + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Two Rules\end{tabular}} + +\begin{itemize} +\item Longest match rule (maximal munch rule): The +longest initial substring matched by any regular expression is taken +as next token.\bigskip + +\item Rule priority: +For a particular longest initial substring, the first regular +expression that can match determines the token. + +\end{itemize} + +%\url{http://www.technologyreview.com/tr10/?year=2011} + +%finite deterministic automata/ nondeterministic automaton + +%\item problem with infix operations, for example i-12 + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\mode{ +\begin{frame}[t] + +\begin{center} +\texttt{"if true then then 42 else +"} +\end{center} + + +\begin{tabular}{@{}l} +KEYWORD: \\ +\hspace{5mm}\texttt{"if"}, \texttt{"then"}, \texttt{"else"},\\ +WHITESPACE:\\ +\hspace{5mm}\texttt{" "}, \texttt{"$\backslash$n"},\\ +IDENT:\\ +\hspace{5mm}LETTER $\cdot$ (LETTER + DIGIT + \texttt{"\_"})$^*$\\ +NUM:\\ +\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + \texttt{"0"}\\ +OP:\\ +\hspace{5mm}\texttt{"+"}\\ +COMMENT:\\ +\hspace{5mm}\texttt{"$\slash$*"} $\cdot$ (ALL$^*$ $\cdot$ \texttt{"*$\slash$"} $\cdot$ ALL$^*$) $\cdot$ \texttt{"*$\slash$"} +\end{tabular} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] + +\begin{center} +\texttt{"if true then then 42 else +"} +\end{center} + +\only<1>{ +\small\begin{tabular}{l} +KEYWORD(if),\\ +WHITESPACE,\\ +IDENT(true),\\ +WHITESPACE,\\ +KEYWORD(then),\\ +WHITESPACE,\\ +KEYWORD(then),\\ +WHITESPACE,\\ +NUM(42),\\ +WHITESPACE,\\ +KEYWORD(else),\\ +WHITESPACE,\\ +OP(+) +\end{tabular}} + +\only<2>{ +\small\begin{tabular}{l} +KEYWORD(if),\\ +IDENT(true),\\ +KEYWORD(then),\\ +KEYWORD(then),\\ +NUM(42),\\ +KEYWORD(else),\\ +OP(+) +\end{tabular}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + + +There is one small problem with the tokenizer. How should we +tokenize: + +\begin{center} +\texttt{"x - 3"} +\end{center} + +\begin{tabular}{@{}l} +OP:\\ +\hspace{5mm}\texttt{"+"}, \texttt{"-"}\\ +NUM:\\ +\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + \texttt{"0"}\\ +NUMBER:\\ +\hspace{5mm}NUM + (\texttt{"-"} $\cdot$ NUM)\\ +\end{tabular} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Negation\end{tabular}} + +Assume you have an alphabet consisting of the letters \bl{a}, \bl{b} and \bl{c} only. +Find a regular expression that matches all strings \emph{except} \bl{ab}, \bl{ac} and \bl{cba}. + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Deterministic Finite Automata\end{tabular}} + +A deterministic finite automaton consists of: + +\begin{itemize} +\item a finite set of states +\item one of these states is the start state +\item some states are accepting states, and +\item there is transition function\medskip + +\small +which takes a state and a character as arguments and produces a new state\smallskip\\ +this function might not always be defined everywhere +\end{itemize} + +\begin{center} +\bl{$A(Q, q_0, F, \delta)$} +\end{center} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\includegraphics[scale=0.7]{pics/ch3.jpg} +\end{center}\pause + +\begin{itemize} +\item start can be an accepting state +\item it is possible that there is no accepting state +\item all states might be accepting (but does not necessarily mean all strings are accepted) +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\includegraphics[scale=0.7]{pics/ch3.jpg} +\end{center} + +for this automaton \bl{$\delta$} is the function\\ + +\begin{center} +\begin{tabular}{lll} +\bl{(q$_0$, a) $\rightarrow$ q$_1$} & \bl{(q$_1$, a) $\rightarrow$ q$_4$} & \bl{(q$_4$, a) $\rightarrow$ q$_4$}\\ +\bl{(q$_0$, b) $\rightarrow$ q$_2$} & \bl{(q$_1$, b) $\rightarrow$ q$_2$} & \bl{(q$_4$, b) $\rightarrow$ q$_4$}\\ +\end{tabular}\ldots +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Accepting a String\end{tabular}} + +Given + +\begin{center} +\bl{$A(Q, q_0, F, \delta)$} +\end{center} + +you can define + +\begin{center} +\begin{tabular}{l} +\bl{$\hat{\delta}(q, \texttt{""}) = q$}\\ +\bl{$\hat{\delta}(q, c::s) = \hat{\delta}(\delta(q, c), s)$}\\ +\end{tabular} +\end{center}\pause + +Whether a string \bl{$s$} is accepted by \bl{$A$}? + +\begin{center} +\hspace{5mm}\bl{$\hat{\delta}(q_0, s) \in F$} +\end{center} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Non-Deterministic\\[-1mm] Finite Automata\end{tabular}} + +A non-deterministic finite automaton consists again of: + +\begin{itemize} +\item a finite set of states +\item one of these states is the start state +\item some states are accepting states, and +\item there is transition \alert{relation}\medskip +\end{itemize} + + +\begin{center} +\begin{tabular}{c} +\bl{(q$_1$, a) $\rightarrow$ q$_2$}\\ +\bl{(q$_1$, a) $\rightarrow$ q$_3$}\\ +\end{tabular} +\hspace{10mm} +\begin{tabular}{c} +\bl{(q$_1$, $\epsilon$) $\rightarrow$ q$_2$}\\ +\end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\includegraphics[scale=0.7]{pics/ch5.jpg} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\begin{tabular}[b]{ll} +\bl{$\varnothing$} & \includegraphics[scale=0.7]{pics/NULL.jpg}\\\\ +\bl{$\epsilon$} & \includegraphics[scale=0.7]{pics/epsilon.jpg}\\\\ +\bl{c} & \includegraphics[scale=0.7]{pics/char.jpg}\\ +\end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\begin{tabular}[t]{ll} +\bl{r$_1$ $\cdot$ r$_2$} & \includegraphics[scale=0.6]{pics/seq.jpg}\\\\ +\end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\begin{tabular}[t]{ll} +\bl{r$_1$ + r$_2$} & \includegraphics[scale=0.7]{pics/alt.jpg}\\\\ +\end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\begin{tabular}[b]{ll} +\bl{r$^*$} & \includegraphics[scale=0.7]{pics/star.jpg}\\ +\end{tabular} +\end{center}\pause\bigskip + +Why can't we just have an epsilon transition from the accepting states to the starting state? + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Subset Construction\end{tabular}} + + +\begin{textblock}{5}(1,2.5) +\includegraphics[scale=0.5]{pics/ch5.jpg} +\end{textblock} + +\begin{textblock}{11}(6.5,4.5) +\begin{tabular}{r|cl} +& a & b\\ +\hline +$\varnothing$ \onslide<2>{\textcolor{white}{*}} & $\varnothing$ & $\varnothing$\\ +$\{0\}$ \onslide<2>{\textcolor{white}{*}} & $\{0,1,2\}$ & $\{2\}$\\ +$\{1\}$ \onslide<2>{\textcolor{white}{*}} &$\{1\}$ & $\varnothing$\\ +$\{2\}$ \onslide<2>{*} & $\varnothing$ &$\{2\}$\\ +$\{0,1\}$ \onslide<2>{\textcolor{white}{*}} &$\{0,1,2\}$ &$\{2\}$\\ +$\{0,2\}$ \onslide<2>{*}&$\{0,1,2\}$ &$\{2\}$\\ +$\{1,2\}$ \onslide<2>{*}& $\{1\}$ & $\{2\}$\\ +\onslide<2>{s:} $\{0,1,2\}$ \onslide<2>{*}&$\{0,1,2\}$ &$\{2\}$\\ +\end{tabular} +\end{textblock} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Regular Languages\end{tabular}} + +A language is \alert{regular} iff there exists +a regular expression that recognises all its strings.\bigskip\medskip + +or equivalently\bigskip\medskip + +A language is \alert{regular} iff there exists +a deterministic finite automaton that recognises all its strings.\bigskip\pause + +Why is every finite set of strings a regular language? +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\includegraphics[scale=0.5]{pics/ch3.jpg} +\end{center} + +\begin{center} +\includegraphics[scale=0.5]{pics/ch4.jpg}\\ +minimal automaton +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{enumerate} +\item Take all pairs \bl{(q, p)} with \bl{q $\not=$ p} +\item Mark all pairs that accepting and non-accepting states +\item For all unmarked pairs \bl{(q, p)} and all characters \bl{c} tests wether +\begin{center} +\bl{($\delta$(q,c), $\delta$(p,c))} +\end{center} +are marked. If yes, then also mark \bl{(q, p)} +\item Repeat last step until no chance. +\item All unmarked pairs can be merged. +\end{enumerate} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Given the function + +\begin{center} +\bl{\begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l} +$rev(\varnothing)$ & $\dn$ & $\varnothing$\\ +$rev(\epsilon)$ & $\dn$ & $\epsilon$\\ +$rev(c)$ & $\dn$ & $c$\\ +$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\ +$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\ +$rev(r^*)$ & $\dn$ & $rev(r)^*$\\ +\end{tabular}} +\end{center} + + +and the set + +\begin{center} +\bl{$Rev\,A \dn \{s^{-1} \;|\; s \in A\}$} +\end{center} + +prove whether + +\begin{center} +\bl{$L(rev(r)) = Rev (L(r))$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{itemize} +\item The star-case in our proof about the matcher needs the following lemma +\begin{center} +\bl{Der\,c\,A$^*$ $=$ (Der c A)\,@\, A$^*$} +\end{center} +\end{itemize}\bigskip\bigskip + +\begin{itemize} +\item If \bl{\texttt{""} $\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B $\cup$ (Der\,c\,B)}\medskip +\item If \bl{\texttt{""} $\not\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B} + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{itemize} +\item Assuming you have the alphabet \bl{\{a, b, c\}}\bigskip +\item Give a regular expression that can recognise all strings that have at least one \bl{b}. +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +``I hate coding. I do not want to look at code.'' + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides05.pdf Binary file slides/slides05.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides05.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides05.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,504 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 05, King's College London, 24.~October 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (5)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Deterministic Finite Automata\end{tabular}} + +A DFA \bl{$A(Q, q_0, F, \delta)$} consists of: + +\begin{itemize} +\item a finite set of states \bl{$Q$} +\item one of these states is the start state \bl{$q_0$} +\item some states are accepting states \bl{$F$} +\item a transition function \bl{$\delta$} +\end{itemize}\pause + +\onslide<2->{ +\begin{center} +\begin{tabular}{l} +\bl{$\hat{\delta}(q, \texttt{""}) = q$}\\ +\bl{$\hat{\delta}(q, c\!::\!s) = \hat{\delta}(\delta(q, c), s)$} +\end{tabular} +\end{center}} + +\only<3,4>{ +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \node[state, initial] (q02) at ( 0,1) {$q_{0}$}; + \node[state] (q13) at ( 1,1) {$q_{1}$}; + \node[state, accepting] (q4) at ( 2,1) {$q_2$}; + \path[->] (q02) edge[bend left] node[above] {$a$} (q13) + (q13) edge[bend left] node[below] {$b$} (q02) + (q13) edge node[above] {$a$} (q4) + (q02) edge [loop below] node {$b$} () + (q4) edge [loop right] node {$a, b$} () + ; +\end{tikzpicture} +\end{center}}% +% +\only<5>{ +\begin{center} +\bl{$L(A) \dn \{ s \;|\; \hat{\delta}(q_0, s) \in F\}$} +\end{center}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Non-Deterministic\\[-1mm] Finite Automata\end{tabular}} + +An NFA \bl{$A(Q, q_0, F, \delta)$} consists again of: + +\begin{itemize} +\item a finite set of states +\item one of these states is the start state +\item some states are accepting states +\item a transition \alert{relation}\medskip +\end{itemize} + + +\begin{center} +\begin{tabular}{c} +\bl{(q$_1$, a) $\rightarrow$ q$_2$}\\ +\bl{(q$_1$, a) $\rightarrow$ q$_3$}\\ +\end{tabular} +\hspace{10mm} +\begin{tabular}{c} +\bl{(q$_1$, $\epsilon$) $\rightarrow$ q$_2$}\\ +\end{tabular} +\end{center}\pause\medskip + +A string \bl{s} is accepted by an NFA, if there is a ``lucky'' sequence to an accepting state. + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Last Week\end{tabular}} + +Last week I showed you\bigskip + +\begin{itemize} +\item an algorithm for automata minimisation + +\item an algorithm for transforming a regular expression into an NFA + +\item an algorithm for transforming an NFA into a DFA (subset construction) + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}This Week\end{tabular}} + +Go over the algorithms again, but with two new things and \ldots\medskip + +\begin{itemize} +\item with the example: what is the regular expression that accepts every string, except those ending +in \bl{aa}?\medskip + +\item Go over the proof for \bl{$L(rev(r)) = Rev(L(r))$}.\medskip + +\item Anything else so far. +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Proofs By Induction\end{tabular}} + +\begin{itemize} +\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip +\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already +holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip +\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already +holds for \bl{r$_1$} and \bl{r$_2$}. +\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already +holds for \bl{r}. +\end{itemize} + +\begin{center} +\bl{$P(r):\;\;L(rev(r)) = Rev(L(r))$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] + +What is the regular expression that accepts every string, except those ending +in \bl{aa}?\pause\bigskip + +\begin{center} +\begin{tabular}{l} +\bl{(a + b)$^*$ba}\\ +\bl{(a + b)$^*$ab}\\ +\bl{(a + b)$^*$bb}\\\pause +\bl{a}\\ +\bl{\texttt{""}} +\end{tabular} +\end{center}\pause + +What are the strings to be avoided?\pause\medskip + +\begin{center} +\bl{(a + b)$^*$aa} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] + +An NFA for \bl{(a + b)$^*$aa} + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state] (q1) at ( 1,1) {$q_1$}; + \node[state, accepting] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge node[above] {$a$} (q1) + (q1) edge node[above] {$a$} (q2) + (q0) edge [loop below] node {$a$} () + (q0) edge [loop above] node {$b$} () + ; +\end{tikzpicture} +\end{center}\pause + +Minimisation for DFAs\\ +Subset Construction for NFAs + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}DFA Minimisation\end{tabular}} + + +\begin{enumerate} +\item Take all pairs \bl{(q, p)} with \bl{q $\not=$ p} +\item Mark all pairs that accepting and non-accepting states +\item For all unmarked pairs \bl{(q, p)} and all characters \bl{c} tests wether +\begin{center} +\bl{($\delta$(q,c), $\delta$(p,c))} +\end{center} +are marked. If yes, then also mark \bl{(q, p)}. +\item Repeat last step until nothing changed. +\item All unmarked pairs can be merged. +\end{enumerate} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Minimal DFA \only<1>{\bl{(a + b)$^*$aa}}\only<2->{\alert{not} \bl{(a + b)$^*$aa}} + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \only<1>{\node[state, initial] (q0) at ( 0,1) {$q_0$};} + \only<2->{\node[state, initial,accepting] (q0) at ( 0,1) {$q_0$};} + \only<1>{\node[state] (q1) at ( 1,1) {$q_1$};} + \only<2->{\node[state,accepting] (q1) at ( 1,1) {$q_1$};} + \only<1>{\node[state, accepting] (q2) at ( 2,1) {$q_2$};} + \only<2->{\node[state] (q2) at ( 2,1) {$q_2$};} + \path[->] (q0) edge[bend left] node[above] {$a$} (q1) + (q1) edge[bend left] node[above] {$b$} (q0) + (q2) edge[bend left=50] node[below] {$b$} (q0) + (q1) edge node[above] {$a$} (q2) + (q2) edge [loop right] node {$a$} () + (q0) edge [loop below] node {$b$} () + ; +\end{tikzpicture} +\end{center} + +\onslide<3>{How to get from a DFA to a regular expression?} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \only<1->{\node[state, initial] (q0) at ( 0,1) {$q_0$};} + \only<1->{\node[state] (q1) at ( 1,1) {$q_1$};} + \only<1->{\node[state] (q2) at ( 2,1) {$q_2$};} + \path[->] (q0) edge[bend left] node[above] {$a$} (q1) + (q1) edge[bend left] node[above] {$b$} (q0) + (q2) edge[bend left=50] node[below] {$b$} (q0) + (q1) edge node[above] {$a$} (q2) + (q2) edge [loop right] node {$a$} () + (q0) edge [loop below] node {$b$} () + ; +\end{tikzpicture} +\end{center}\pause\bigskip + +\onslide<2->{ +\begin{center} +\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} +\bl{$q_0$} & \bl{$=$} & \bl{$2\, q_0 + 3 \,q_1 + 4\, q_2$}\\ +\bl{$q_1$} & \bl{$=$} & \bl{$2 \,q_0 + 3\, q_1 + 1\, q_2$}\\ +\bl{$q_2$} & \bl{$=$} & \bl{$1\, q_0 + 5\, q_1 + 2\, q_2$}\\ + +\end{tabular} +\end{center} +} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \only<1->{\node[state, initial] (q0) at ( 0,1) {$q_0$};} + \only<1->{\node[state] (q1) at ( 1,1) {$q_1$};} + \only<1->{\node[state] (q2) at ( 2,1) {$q_2$};} + \path[->] (q0) edge[bend left] node[above] {$a$} (q1) + (q1) edge[bend left] node[above] {$b$} (q0) + (q2) edge[bend left=50] node[below] {$b$} (q0) + (q1) edge node[above] {$a$} (q2) + (q2) edge [loop right] node {$a$} () + (q0) edge [loop below] node {$b$} () + ; +\end{tikzpicture} +\end{center}\bigskip + +\onslide<2->{ +\begin{center} +\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} +\bl{$q_0$} & \bl{$=$} & \bl{$\epsilon + q_0\,b + q_1\,b + q_2\,b$}\\ +\bl{$q_1$} & \bl{$=$} & \bl{$q_0\,a$}\\ +\bl{$q_2$} & \bl{$=$} & \bl{$q_1\,a + q_2\,a$}\\ + +\end{tabular} +\end{center} +} + +\onslide<3->{ +Arden's Lemma: +\begin{center} +If \bl{$q = q\,r + s$}\; then\; \bl{$q = s\, r^*$} +\end{center} +} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Algorithms on Automata\end{tabular}} + + +\begin{itemize} +\item Reg $\rightarrow$ NFA: Thompson-McNaughton-Yamada method\medskip +\item NFA $\rightarrow$ DFA: Subset Construction\medskip +\item DFA $\rightarrow$ Reg: Brzozowski's Algebraic Method\medskip +\item DFA minimisation: Hopcrofts Algorithm\medskip +\item complement DFA +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\newcommand{\qq}{\mbox{\texttt{"}}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Grammars\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F + (F \cdot \qq*\qq \cdot F) + (F \cdot \qq\backslash\qq \cdot F)$\\ +$F$ & $\rightarrow$ & $T + (T \cdot \qq\texttt{+}\qq \cdot T) + (T \cdot \qq\texttt{-}\qq \cdot T)$\\ +$T$ & $\rightarrow$ & $num + (\qq\texttt{(}\qq \cdot E \cdot \qq\texttt{)}\qq)$\\ +\end{tabular}} +\end{center} + +\bl{$E$}, \bl{$F$} and \bl{$T$} are non-terminals\\ +\bl{$E$} is start symbol\\ +\bl{$num$}, \bl{(}, \bl{)}, \bl{+} \ldots are terminals\bigskip\\ + + +\bl{\texttt{(2*3)+(3+4)}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F + (F \cdot \qq*\qq \cdot F) + (F \cdot \qq\backslash\qq \cdot F)$\\ +$F$ & $\rightarrow$ & $T + (T \cdot \qq\texttt{+}\qq \cdot T) + (T \cdot \qq\texttt{-}\qq \cdot T)$\\ +$T$ & $\rightarrow$ & $num + (\qq\texttt{(}\qq \cdot E \cdot \qq\texttt{)}\qq)$\\ +\end{tabular}} +\end{center} + +\begin{center} +\begin{tikzpicture}[level distance=8mm, blue] + \node {E} + child {node {F} + child {node {T} + child {node {\qq(\qq\,E\,\qq)\qq} + child {node{F \qq*\qq{} F} + child {node {T} child {node {2}}} + child {node {T} child {node {3}}} + } + } + } + child {node {\qq+\qq}} + child {node {T} + child {node {\qq(\qq\,E\,\qq)\qq} + child {node {F} + child {node {T \qq+\qq{} T} + child {node {3}} + child {node {4}} + } + }} + }}; +\end{tikzpicture} +\end{center} + +\begin{textblock}{5}(1, 5) +\bl{\texttt{(2*3)+(3+4)}} +\end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides06.pdf Binary file slides/slides06.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides06.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides06.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,579 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usetikzlibrary{plotmarks} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 06, King's College London, 31.~October 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + + +% The data files, written on the first run. +\begin{filecontents}{re-python.data} +1 0.029 +5 0.029 +10 0.029 +15 0.032 +16 0.042 +17 0.042 +18 0.055 +19 0.084 +20 0.136 +21 0.248 +22 0.464 +23 0.899 +24 1.773 +25 3.505 +26 6.993 +27 14.503 +28 29.307 +#29 58.886 +\end{filecontents} + +\begin{filecontents}{re1.data} +1 0.00179 +2 0.00011 +3 0.00014 +4 0.00026 +5 0.00050 +6 0.00095 +7 0.00190 +8 0.00287 +9 0.00779 +10 0.01399 +11 0.01894 +12 0.03666 +13 0.07994 +14 0.08944 +15 0.02377 +16 0.07392 +17 0.22798 +18 0.65310 +19 2.11360 +20 6.31606 +21 21.46013 +\end{filecontents} + +\begin{filecontents}{re2.data} +1 0.00240 +2 0.00013 +3 0.00020 +4 0.00030 +5 0.00049 +6 0.00083 +7 0.00146 +8 0.00228 +9 0.00351 +10 0.00640 +11 0.01217 +12 0.02565 +13 0.01382 +14 0.02423 +15 0.05065 +16 0.06522 +17 0.02140 +18 0.05176 +19 0.18254 +20 0.51898 +21 1.39631 +22 2.69501 +23 8.07952 +\end{filecontents} + +\begin{filecontents}{re-internal.data} +1 0.00069 +301 0.00700 +601 0.00297 +901 0.00470 +1201 0.01301 +1501 0.01175 +1801 0.01761 +2101 0.01787 +2401 0.02717 +2701 0.03932 +3001 0.03470 +3301 0.04349 +3601 0.05411 +3901 0.06181 +4201 0.07119 +4501 0.08578 +\end{filecontents} + +\begin{filecontents}{re3.data} +1 0.001605 +501 0.131066 +1001 0.057885 +1501 0.136875 +2001 0.176238 +2501 0.254363 +3001 0.37262 +3501 0.500946 +4001 0.638384 +4501 0.816605 +5001 1.00491 +5501 1.232505 +6001 1.525672 +6501 1.757502 +7001 2.092784 +7501 2.429224 +8001 2.803037 +8501 3.463045 +9001 3.609 +9501 4.081504 +10001 4.54569 +\end{filecontents} +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (6)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +``I hate coding. I do not want to look at code.'' + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +``I am appalled. You do not show code anymore.'' + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}ReDoS\end{tabular}} + +\begin{itemize} +\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice\bigskip +\item ``Regular Expressions Will Stab You in the Back''\bigskip +\item Evil regular expressions\medskip +\begin{itemize} +\item \bl{$(a?\{n\})a\{n\}$} +\item \bl{$(a^+)^+$} +\item \bl{$([a-zA-Z]^+)^*$} +\item \bl{$(a + aa)^+$} +\item \bl{$(a + a?)^+$} +\end{itemize} +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Regexp Matching\end{tabular}} + +Given a regular expression + +\begin{enumerate} +\item you might convert it into a DFA (subset construction) +\item you might try all possible paths in an NFA via backtracking +\item you might try all paths in an NFA in parallel +\item you might try to convert the DFA ``lazily'' +\end{enumerate}\bigskip + +Often No~2 is implemented (sometimes there are even good reasons for doing this). + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}\bl{$(a?\{n\})a\{n\}$} in Python\end{tabular}} + +\begin{tikzpicture}[y=.2cm, x=.3cm] + %axis + \draw (0,0) -- coordinate (x axis mid) (30,0); + \draw (0,0) -- coordinate (y axis mid) (0,30); + %ticks + \foreach \x in {0,5,...,30} + \draw (\x,1pt) -- (\x,-3pt) + node[anchor=north] {\x}; + \foreach \y in {0,5,...,30} + \draw (1pt,\y) -- (-3pt,\y) + node[anchor=east] {\y}; + %labels + \node[below=0.6cm] at (x axis mid) {\bl{a}s}; + \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; + + %plots + \draw[color=blue] plot[mark=*, mark options={fill=white}] + file {re-python.data}; + \only<2->{ + \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] + file {re1.data};} + \only<3->{ + \draw[color=green] plot[mark=square*, mark options={fill=white} ] + file {re2.data};} + + %legend + \begin{scope}[shift={(4,20)}] + \draw[color=blue] (0,0) -- + plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Python}; + \only<2->{\draw[yshift=\baselineskip, color=red] (0,0) -- + plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala V1};} + \only<3->{ + \draw[yshift=2\baselineskip, color=green] (0,0) -- + plot[mark=square*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala V2 with simplifications};} + \end{scope} +\end{tikzpicture} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + + +\begin{tikzpicture}[y=.7cm, x=.0009cm] + %axis + \draw (0,0) -- coordinate (x axis mid) (10000,0); + \draw (0,0) -- coordinate (y axis mid) (0,6); + %ticks + \foreach \x in {0,2000,...,10000} + \draw (\x,1pt) -- (\x,-3pt) + node[anchor=north] {\x}; + \foreach \y in {0,1,...,6} + \draw (1pt,\y) -- (-3pt,\y) + node[anchor=east] {\y}; + %labels + \node[below=0.6cm] at (x axis mid) {\bl{a}s}; + \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; + + %plots + \draw[color=blue] plot[mark=*, mark options={fill=white}] + file {re-internal.data}; + \only<2->{ + \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] + file {re3.data};} + + %legend + \begin{scope}[shift={(2000,4)}] + \draw[color=blue] (0,0) -- + plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala Internal}; + \only<2->{ + \draw[yshift=\baselineskip, color=red] (0,0) -- + plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala V3 with explicit $\_\{\_\}$};} + \end{scope} +\end{tikzpicture} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +\newcommand{\qq}{\mbox{\texttt{"}}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Grammars\end{tabular}} + +A (context-free) grammar \bl{$G$} consists of + +\begin{itemize} +\item a finite set of nonterminal symbols (upper case) +\item a finite terminal symbols or tokens (lower case) +\item a start symbol (which must be a nonterminal) +\item a set of rules +\begin{center} +\bl{$A \rightarrow \text{rhs}$} +\end{center} + +where \bl{rhs} are sequences involving terminals and nonterminals.\medskip\pause + +We can also allow rules +\begin{center} +\bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} +\end{center} +\end{itemize} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Palindromes\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $\epsilon$ \\ +$S$ & $\rightarrow$ & $a\cdot S\cdot a$ \\ +$S$ & $\rightarrow$ & $b\cdot S\cdot b$ \\ +\end{tabular}} +\end{center}\pause + +or + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\ +\end{tabular}} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Arithmetic Expressions\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center}\pause + +\bl{\texttt{1 + 2 * 3 + 4}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Parse Trees\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\ +$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\ +$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\ +\end{tabular}} +\end{center} + +\begin{center} +\begin{tikzpicture}[level distance=8mm, blue] + \node {$E$} + child {node {$F$} + child {node {$T$} + child {node {(\,$E$\,)} + child {node{$F$ *{} $F$} + child {node {$T$} child {node {2}}} + child {node {$T$} child {node {3}}} + } + } + } + child {node {+}} + child {node {$T$} + child {node {(\,$E$\,)} + child {node {$F$} + child {node {$T$ +{} $T$} + child {node {3}} + child {node {4}} + } + }} + }}; +\end{tikzpicture} +\end{center} + +\begin{textblock}{5}(1, 6.5) +\bl{\texttt{(2*3)+(3+4)}} +\end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Ambiguous Grammars\end{tabular}} + +A grammar is \alert{ambiguous} if there is a string that has at least parse trees. + + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center} + +\bl{\texttt{1 + 2 * 3 + 4}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Chomsky Normal Form\end{tabular}} + +All rules must be of the form + +\begin{center} +\bl{$A \rightarrow a$} +\end{center} + +or + +\begin{center} +\bl{$A \rightarrow B\cdot C$} +\end{center} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}CYK Algorithm\end{tabular}} + + +\begin{center} +\bl{\begin{tabular}{@ {}lcl} +$S$ & $\rightarrow$ & $N\cdot P$ \\ +$P$ & $\rightarrow$ & $V\cdot N$ \\ +$N$ & $\rightarrow$ & $N\cdot N$ \\ +$N$ & $\rightarrow$ & $\texttt{students} \;|\; \texttt{Jeff} \;|\; \texttt{geometry} \;|\; \texttt{trains} $ \\ +$V$ & $\rightarrow$ & $\texttt{trains}$ +\end{tabular}} +\end{center} + +\bl{\texttt{Jeff trains geometry students}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}CYK Algorithm\end{tabular}} + + +\begin{itemize} +\item runtime is \bl{$O(n^3)$}\bigskip +\item grammars need to be transferred into CNF +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides07.pdf Binary file slides/slides07.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides07.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides07.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,542 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usetikzlibrary{plotmarks} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 07, King's College London, 14.~November 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + + +% The data files, written on the first run. +\begin{filecontents}{re-python.data} +1 0.029 +5 0.029 +10 0.029 +15 0.032 +16 0.042 +17 0.042 +18 0.055 +19 0.084 +20 0.136 +21 0.248 +22 0.464 +23 0.899 +24 1.773 +25 3.505 +26 6.993 +27 14.503 +28 29.307 +#29 58.886 +\end{filecontents} + +\begin{filecontents}{re-ruby.data} +1 0.00006 +2 0.00003 +3 0.00001 +4 0.00001 +5 0.00001 +6 0.00002 +7 0.00002 +8 0.00004 +9 0.00007 +10 0.00013 +11 0.00026 +12 0.00055 +13 0.00106 +14 0.00196 +15 0.00378 +16 0.00764 +17 0.01606 +18 0.03094 +19 0.06508 +20 0.12420 +21 0.25393 +22 0.51449 +23 1.02174 +24 2.05998 +25 4.22514 +26 8.42479 +27 16.88678 +28 34.79653 +\end{filecontents} + +\begin{filecontents}{re1.data} +1 0.00179 +2 0.00011 +3 0.00014 +4 0.00026 +5 0.00050 +6 0.00095 +7 0.00190 +8 0.00287 +9 0.00779 +10 0.01399 +11 0.01894 +12 0.03666 +13 0.07994 +14 0.08944 +15 0.02377 +16 0.07392 +17 0.22798 +18 0.65310 +19 2.11360 +20 6.31606 +21 21.46013 +\end{filecontents} + +\begin{filecontents}{re2.data} +1 0.00240 +2 0.00013 +3 0.00020 +4 0.00030 +5 0.00049 +6 0.00083 +7 0.00146 +8 0.00228 +9 0.00351 +10 0.00640 +11 0.01217 +12 0.02565 +13 0.01382 +14 0.02423 +15 0.05065 +16 0.06522 +17 0.02140 +18 0.05176 +19 0.18254 +20 0.51898 +21 1.39631 +22 2.69501 +23 8.07952 +\end{filecontents} + +\begin{filecontents}{re-internal.data} +1 0.00069 +301 0.00700 +601 0.00297 +901 0.00470 +1201 0.01301 +1501 0.01175 +1801 0.01761 +2101 0.01787 +2401 0.02717 +2701 0.03932 +3001 0.03470 +3301 0.04349 +3601 0.05411 +3901 0.06181 +4201 0.07119 +4501 0.08578 +\end{filecontents} + +\begin{filecontents}{re3.data} +1 0.001605 +501 0.131066 +1001 0.057885 +1501 0.136875 +2001 0.176238 +2501 0.254363 +3001 0.37262 +3501 0.500946 +4001 0.638384 +4501 0.816605 +5001 1.00491 +5501 1.232505 +6001 1.525672 +6501 1.757502 +7001 2.092784 +7501 2.429224 +8001 2.803037 +8501 3.463045 +9001 3.609 +9501 4.081504 +10001 4.54569 +\end{filecontents} +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (7)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}\bl{$(a?\{n\})a\{n\}$}\end{tabular}} + +\mbox{}\\[-13mm] + +\begin{tikzpicture}[y=.2cm, x=.3cm] + %axis + \draw (0,0) -- coordinate (x axis mid) (30,0); + \draw (0,0) -- coordinate (y axis mid) (0,30); + %ticks + \foreach \x in {0,5,...,30} + \draw (\x,1pt) -- (\x,-3pt) + node[anchor=north] {\x}; + \foreach \y in {0,5,...,30} + \draw (1pt,\y) -- (-3pt,\y) + node[anchor=east] {\y}; + %labels + \node[below=0.6cm] at (x axis mid) {\bl{a}s}; + \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; + + %plots + \draw[color=blue] plot[mark=*, mark options={fill=white}] + file {re-python.data}; + \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] + file {re1.data}; + \draw[color=green] plot[mark=square*, mark options={fill=white} ] + file {re2.data}; + \draw[color=brown] plot[mark=pentagon*, mark options={fill=white} ] + file {re-ruby.data}; + + %legend + \begin{scope}[shift={(4,20)}] + \draw[color=blue] (0,0) -- + plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Python}; + \draw[yshift=-\baselineskip, color=brown] (0,0) -- + plot[mark=pentagon*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Ruby (Daniel Baldwin)}; + \draw[yshift=\baselineskip, color=red] (0,0) -- + plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala V1}; + \draw[yshift=2\baselineskip, color=green] (0,0) -- + plot[mark=square*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala V2 with simplifications}; + \end{scope} +\end{tikzpicture} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] + +\begin{tikzpicture}[y=.7cm, x=.0009cm] + %axis + \draw (0,0) -- coordinate (x axis mid) (10000,0); + \draw (0,0) -- coordinate (y axis mid) (0,6); + %ticks + \foreach \x in {0,2000,...,10000} + \draw (\x,1pt) -- (\x,-3pt) + node[anchor=north] {\x}; + \foreach \y in {0,1,...,6} + \draw (1pt,\y) -- (-3pt,\y) + node[anchor=east] {\y}; + %labels + \node[below=0.6cm] at (x axis mid) {\bl{a}s}; + \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; + + %plots + \draw[color=blue] plot[mark=*, mark options={fill=white}] + file {re-internal.data}; + \only<1->{ + \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] + file {re3.data};} + + %legend + \begin{scope}[shift={(2000,4)}] + \draw[color=blue] (0,0) -- + plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala Internal}; + \only<1->{ + \draw[yshift=\baselineskip, color=red] (0,0) -- + plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small Scala V3 with explicit $\_\{\_\}$};} + \end{scope} +\end{tikzpicture} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} + \\[-8mm] + \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ + & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ + & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ + \bl{der c (r$\{n\}$)} & \bl{$\dn$} & \bl{if $n = 0$ then $\varnothing$}\\ + & & \bl{else (der c r) $\cdot$ r$\{n - 1\}$} + \end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +\newcommand{\qq}{\mbox{\texttt{"}}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}CFGs\end{tabular}} + +A \alert{context-free} grammar (CFG) \bl{$G$} consists of: + +\begin{itemize} +\item a finite set of nonterminal symbols (upper case) +\item a finite terminal symbols or tokens (lower case) +\item a start symbol (which must be a nonterminal) +\item a set of rules +\begin{center} +\bl{$A \rightarrow \text{rhs}$} +\end{center} + +where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause + +We can also allow rules +\begin{center} +\bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} +\end{center} +\end{itemize} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}A CFG Derivation\end{tabular}} + +\begin{enumerate} +\item Begin with a string with only the start symbol \bl{$S$}\bigskip +\item Replace any non-terminal \bl{$X$} in the string by the right-hand side of some production \bl{$X \rightarrow \text{rhs}$}\bigskip +\item Repeat 2 until there are no non-terminals +\end{enumerate} + +\begin{center} +\bl{$S \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Language of a CFG\end{tabular}} + +Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. +Then the language \bl{$L(G)$} is: + +\begin{center} +\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$} +\end{center}\pause + +\begin{itemize} +\item Terminals are so-called because there are no rules for replacing them +\item Once generated, terminals are ``permanent'' +\item Terminals ought to be tokens of the language (at least in this course) +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Arithmetic Expressions\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center}\pause\bigskip + +A CFG is \alert{left-recursive} if it has a nonterminal \bl{$E$} such +that \bl{$E \rightarrow^+ E\cdot \ldots$} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Parse Trees\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\ +$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\ +$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\ +\end{tabular}} +\end{center} + +\begin{center} +\begin{tikzpicture}[level distance=8mm, blue] + \node {$E$} + child {node {$F$} + child {node {$T$} + child {node {(\,$E$\,)} + child {node{$F$ *{} $F$} + child {node {$T$} child {node {2}}} + child {node {$T$} child {node {3}}} + } + } + } + child {node {+}} + child {node {$T$} + child {node {(\,$E$\,)} + child {node {$F$} + child {node {$T$ +{} $T$} + child {node {3}} + child {node {4}} + } + }} + }}; +\end{tikzpicture} +\end{center} + +\begin{textblock}{5}(1, 6.5) +\bl{\texttt{(2*3)+(3+4)}} +\end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Ambiguous Grammars\end{tabular}} + +A CFG is \alert{ambiguous} if there is a string that has at least parse trees. + + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $num\_token$ \\ +$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ +$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ +$E$ & $\rightarrow$ & $( \cdot E \cdot )$ +\end{tabular}} +\end{center} + +\bl{\texttt{1 + 2 * 3 + 4}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Dangling Else\end{tabular}} + +Another ambiguous grammar:\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & if $E$ then $E$\\ + & $|$ & if $E$ then $E$ else $E$ \\ + & $|$ & id +\end{tabular}} +\end{center}\bigskip + +\bl{\texttt{if a then if x then y else c}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides08.pdf Binary file slides/slides08.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides08.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides08.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,676 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usetikzlibrary{plotmarks} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 08, King's College London, 21.~November 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + + +% The data files, written on the first run. +\begin{filecontents}{s-grammar1.data} +1 0.01152 +51 0.07973 +101 0.09726 +151 0.09320 +201 0.10010 +251 0.16997 +301 0.26662 +351 0.46118 +401 0.62516 +451 0.87247 +501 1.16334 +551 1.71152 +601 2.10958 +651 2.44360 +701 2.98488 +751 3.50326 +801 4.11036 +851 4.93394 +901 5.77465 +951 7.39123 +\end{filecontents} + +\begin{filecontents}{s-grammar2.data} +1 0.01280 +2 0.00064 +3 0.00173 +4 0.00355 +5 0.00965 +6 0.02674 +7 0.06953 +8 0.11166 +9 0.18707 +10 0.09189 +11 0.12724 +12 0.24337 +13 0.59304 +14 1.53594 +15 4.01195 +16 10.73582 +17 29.51587 +#18 73.14163 +\end{filecontents} + + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (8)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Building a ``Web Browser''\end{tabular}} + +Using a lexer: assume the following regular expressions + +\begin{center} +\bl{\begin{tabular}{lcl} +$SY\!M$ & $\dn$ & $(\text{a}..\text{zA}..\text{Z0}..\text{9}..)$\\ +$W\!O\!RD$ & $\dn$ & $SY\!M^+$\\ +$BT\!AG$ & $\dn$ & $<\!W\!O\!RD\!>$\\ +$ET\!AG$ & $\dn$ & $<\!/W\!O\!RD\!>$\\ +$W\!HIT\!E$ & $\dn$ & $\texttt{" "} + \texttt{"}\slash{}n\texttt{"}$\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}} + +\begin{itemize} +\item the text should be formatted consistently up to a specified width, say 60 characters +\item potential linebreaks are inserted by the formatter (not the input) +\item repeated whitespaces are ``condensed'' to a single whitepace +\item \bl{$<\!p\!>$} \bl{$<\!\slash{}p\!>$} start/end paragraph +\item \bl{$<\!b\!>$} \bl{$<\!\slash{}b\!>$} start/end bold +\item \bl{$<\!red\!>$} \bl{$<\!\slash{}red\!>$} start/end red (cyan, etc) + + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}} + +The lexer cannot prevent errors like + +\begin{center} +\bl{$<\!b\!>$} \ldots \bl{$<\!p\!>$} \ldots \bl{$<\!\slash{}b\!>$} \ldots \bl{$<\!\slash{}p\!>$} +\end{center} + +or + +\begin{center} +\bl{$<\!\slash{}b\!>$} \ldots \bl{$<\!b\!>$} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Parser Combinators\end{tabular}} + +Parser combinators: \bigskip + +\begin{minipage}{1.1\textwidth} +\begin{center} +\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} +$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ +\end{center} +\end{minipage}\bigskip + +\begin{itemize} +\item sequencing +\item alternative +\item semantic action +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Alternative parser (code \bl{$p\;||\;q$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} and also \bl{$q$}; then combine the outputs +\end{itemize} + +\begin{center} +\large \bl{$p(\text{input}) \cup q(\text{input})$} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Sequence parser (code \bl{$p\sim q$})\bigskip + +\begin{itemize} +\item apply first \bl{$p$} producing a set of pairs +\item then apply \bl{$q$} to the unparsed parts +\item then combine the results:\\ \mbox{}\;\;((output$_1$, output$_2$), unparsed part) +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} +\end{tabular} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Function parser (code \bl{$p \Longrightarrow f$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} producing a set of pairs +\item then apply the function \bl{$f$} to each first component +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} +\end{tabular} +\end{center}\bigskip\bigskip\pause + +\bl{$f$} is the semantic action (``what to do with the parsed input'') + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Token parser:\bigskip + +\begin{itemize} +\item if the input is + +\begin{center} +\large \bl{$tok_1:: tok_2 :: \ldots :: tok_n$} +\end{center} + +then return + +\begin{center} +\large \bl{$\{(tok_1,\; tok_2 :: \ldots :: tok_n)\}$} +\end{center} + +or + +\begin{center} +\large \bl{$\{\}$} +\end{center} + +if \bl{$tok_1$} is not the right token we are looking for +\end{itemize} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Number-Token parser:\bigskip + +\begin{itemize} +\item if the input is + +\begin{center} +\large \bl{$num\_tok(42):: tok_2 :: \ldots :: tok_n$} +\end{center} + +then return + +\begin{center} +\large \bl{$\{(42,\; tok_2 :: \ldots :: tok_n)\}$} +\end{center} + +or + +\begin{center} +\large \bl{$\{\}$} +\end{center} + +if \bl{$tok_1$} is not the right token we are looking for +\end{itemize}\pause + +\begin{center} +list of tokens \bl{$\Rightarrow$} set of (\alert{int}, list of tokens) +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{itemize} +\item if the input is + +\begin{center} +\begin{tabular}{l} +\large \bl{$num\_tok(42)::$}\\ +\hspace{7mm}\large \bl{$num\_tok(3) ::$}\\ +\hspace{14mm}\large \bl{$tok_3 :: \ldots :: tok_n$} +\end{tabular} +\end{center} + +and the parser is + +\begin{center} +\bl{$ntp \sim ntp$} +\end{center} + +the successful output will be + +\begin{center} +\large \bl{$\{((42, 3),\; tok_2 :: \ldots :: tok_n)\}$} +\end{center}\pause + +Now we can form +\begin{center} +\bl{$(ntp \sim ntp) \Longrightarrow f$} +\end{center} + +where \bl{$f$} is the semantic action (``what to do with the pair'') + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} + +Addition + +\begin{center} +\bl{$T \sim + \sim E \Longrightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} +\end{center}\pause + +Multiplication + +\begin{center} +\bl{$F \sim * \sim T \Longrightarrow f((x,y), z) \Rightarrow x * z$} +\end{center}\pause + +Parenthesis + +\begin{center} +\bl{$\text{(} \sim E \sim \text{)} \Longrightarrow f((x,y), z) \Rightarrow y$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Types of Parsers\end{tabular}} + +\begin{itemize} +\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, +then \bl{$p \sim q$} returns results of type + +\begin{center} +\bl{$T \times S$} +\end{center}\pause + +\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, +and \bl{$p \;||\; q$} returns results of type + +\begin{center} +\bl{$T$} +\end{center}\pause + +\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from +\bl{$T$} to \bl{$S$}, then +\bl{$p \Longrightarrow f$} returns results of type + +\begin{center} +\bl{$S$} +\end{center} + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Input Types of Parsers\end{tabular}} + +\begin{itemize} +\item input: \alert{list of tokens} +\item output: set of (output\_type, \alert{list of tokens}) +\end{itemize}\bigskip\pause + +actually it can be any input type as long as it is a kind of sequence +(for example a string) + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Scannerless Parsers\end{tabular}} + +\begin{itemize} +\item input: \alert{string} +\item output: set of (output\_type, \alert{string}) +\end{itemize}\bigskip + +but lexers are better when whitespaces or comments need to be filtered out + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Successful Parses\end{tabular}} + +\begin{itemize} +\item input: string +\item output: \alert{set of} (output\_type, string) +\end{itemize}\bigskip + +a parse is successful whenever the input has been +fully ``consumed'' (that is the second component is empty) + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +{\lstset{language=Scala}\fontsize{10}{12}\selectfont +\texttt{\lstinputlisting{app7.scala}}} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +{\lstset{language=Scala}\fontsize{10}{12}\selectfont +\texttt{\lstinputlisting{app7.scala}}} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +{\lstset{language=Scala}\fontsize{10}{12}\selectfont +\texttt{\lstinputlisting{app8.scala}}} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Two Grammars\end{tabular}} + +Which languages are recognised by the following two grammars? + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center}\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$U$ & $\rightarrow$ & $1 \cdot U$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Ambiguous Grammars\end{tabular}} + +\mbox{}\\[-25mm]\mbox{} + +\begin{center} +\begin{tikzpicture}[y=.2cm, x=.009cm] + %axis + \draw (0,0) -- coordinate (x axis mid) (1000,0); + \draw (0,0) -- coordinate (y axis mid) (0,30); + %ticks + \foreach \x in {0, 20, 100, 200,...,1000} + \draw (\x,1pt) -- (\x,-3pt) + node[anchor=north] {\small \x}; + \foreach \y in {0,5,...,30} + \draw (1pt,\y) -- (-3pt,\y) + node[anchor=east] {\small\y}; + %labels + \node[below=0.6cm] at (x axis mid) {\bl{1}s}; + \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; + + %plots + \draw[color=blue] plot[mark=*, mark options={fill=white}] + file {s-grammar1.data}; + \only<2->{\draw[color=red] plot[mark=triangle*, mark options={fill=white} ] + file {s-grammar2.data};} + %legend + \begin{scope}[shift={(400,20)}] + \draw[color=blue] (0,0) -- + plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small unambiguous}; + \only<2->{\draw[yshift=\baselineskip, color=red] (0,0) -- + plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) + node[right]{\small ambiguous};} + \end{scope} +\end{tikzpicture} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}What about Left-Recursion?\end{tabular}} + +\begin{itemize} +\item we record when we recursively called a parser\bigskip +\item whenever the is a recursion, the parser must have consumed something --- so +we can decrease the input string/list of token by one (at the end) +\end{itemize} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}While-Language\end{tabular}} + + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$Stmt$ & $\rightarrow$ & $\text{skip}$\\ + & $|$ & $Id := AExp$\\ + & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ + & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ +$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ + & $|$ & $Stmt$\medskip\\ +$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ + & $|$ & $Stmt$\medskip\\ +$AExp$ & $\rightarrow$ & \ldots\\ +$BExp$ & $\rightarrow$ & \ldots\\ +\end{tabular}} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}An Interpreter\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{l} +$\{$\\ +\;\;$x := 5 \text{;}$\\ +\;\;$y := x * 3\text{;}$\\ +\;\;$y := x * 4\text{;}$\\ +\;\;$x := u * 3$\\ +$\}$ +\end{tabular}} +\end{center} + +\begin{itemize} +\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause +\item \bl{\text{eval}(stmt, env)} +\end{itemize} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides09.pdf Binary file slides/slides09.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides09.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides09.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,853 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usetikzlibrary{plotmarks} +\usepackage{graphicx} +\usepackage{pgfplots} + + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{while}{ + morekeywords={if,then,else,while,do,true,false,write}, + otherkeywords={=,!=,:=,<,>,;}, + sensitive=true, + morecomment=[n]{/*}{*/}, +} + + +\lstset{language=While, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + + +% beamer stuff +\renewcommand{\slidecaption}{AFL 09, King's College London, 28.~November 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + + +% The data files, written on the first run. +\begin{filecontents}{compiled.data} +%1 0.234146 +%5000 0.227539 +%10000 0.280748 +50000 1.087897 +100000 3.713165 +250000 21.6624545 +500000 85.872613 +750000 203.6408015 +1000000 345.736574 +\end{filecontents} + +\begin{filecontents}{interpreted.data} +%1 0.00503 +200 1.005863 +400 7.8296765 +500 15.43106 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 +\end{filecontents} + +\begin{filecontents}{interpreted2.data} +%1 0.00503 +200 1.005863 +400 7.8296765 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 +\end{filecontents} + +\begin{filecontents}{compiled2.data} +200 0.222058 +400 0.215204 +600 0.202031 +800 0.21986 +1000 0.205934 +1200 0.1981615 +1400 0.207116 +\end{filecontents} + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (9)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +Imagine the following situation: You talk to somebody +and you find out that she/he has implemented a compiler.\smallskip + +What is your reaction? Check all that apply.\bigskip\pause + + \begin{itemize} + \item[$\Box$] You think she/he is God + \item[$\Box$] \"Uberhacker + \item[$\Box$] superhuman + \item[$\Box$] wizard + \item[$\Box$] supremo + \end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}While-Language\end{tabular}} + + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$Stmt$ & $\rightarrow$ & $\text{skip}$\\ + & $|$ & $Id := AExp$\\ + & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ + & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\\ + & $|$ & $\alert{\text{write}\; Id}$\medskip\\ +$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ + & $|$ & $Stmt$\medskip\\ +$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ + & $|$ & $Stmt$\medskip\\ +$AExp$ & $\rightarrow$ & \ldots\\ +$BExp$ & $\rightarrow$ & \ldots\\ +\end{tabular}} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Fibonacci Numbers\end{tabular}} + +\mbox{}\\[-18mm]\mbox{} + +{\lstset{language=While}\fontsize{10}{12}\selectfont +\texttt{\lstinputlisting{fib.while}}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Interpreter\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$\text{eval}(n, E)$ & $\dn$ & $n$\\ +$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\ +$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\ +$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\ +$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\ +$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\ +$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\ +$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\ +$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\ +\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\; +\text{eval}(cs_1,E)$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\ +\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\; +\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\ +\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\ +$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Test Program\end{tabular}} + +\mbox{}\\[-18mm]\mbox{} + +{\lstset{language=While}\fontsize{10}{12}\selectfont +\texttt{\lstinputlisting{loops.while}}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] +\addplot+[smooth] file {interpreted.data}; +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}} + +\begin{itemize} +\item introduced in 1995 +\item is a stack-based VM (like Postscript, CLR of .Net) +\item contains a JIT compiler +\item many languages take advantage of JVM's infrastructure (JRE) +\item is garbage collected $\Rightarrow$ no buffer overflows +\item some languages compiled to the JVM: Scala, Clojure\ldots +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +{\Large\bl{1 + 2}} + +\begin{center} +\bl{\begin{tabular}{l} +ldc 1\\ +ldc 2\\ +iadd\\ +\end{tabular}} +\end{center}\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +{\Large\bl{1 + 2 + 3}} + +\begin{center} +\bl{\begin{tabular}{l} +ldc 1\\ +ldc 2\\ +iadd\\ +ldc 3\\ +iadd\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +{\Large\bl{1 + (2 + 3)}} + +\begin{center} +\bl{\begin{tabular}{l} +ldc 1\\ +ldc 2\\ +ldc 3\\ +iadd\\ +iadd\\ +\end{tabular}} +\end{center}\bigskip\pause +\vfill + +\bl{dadd, fadd, ladd, \ldots} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$\text{compile}(n)$ & $\dn$ & $\text{ldc}\;n$\\ +$\text{compile}(a_1 + a_2)$ & $\dn$\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\;\text{compile}(a_2)\;@\; \text{iadd}$}\smallskip\\ +$\text{compile}(a_1 - a_2)$ & $\dn$\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\; \text{compile}(a_2)\;@\; \text{isub}$}\smallskip\\ +$\text{compile}(a_1 * a_2)$ & $\dn$\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\; \text{compile}(a_2)\;@\; \text{imul}$}\smallskip\\ +\end{tabular}} +\end{center}\pause + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +{\Large\bl{1 + 2 * 3 + (4 - 3)}} + +\begin{center} +\bl{\begin{tabular}{l} +ldc 1\\ +ldc 2\\ +ldc 3\\ +imul\\ +ldc 4\\ +ldc 3\\ +isub\\ +iadd\\ +iadd\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Variables\end{tabular}} + +{\Large\bl{$x := 5 + y * 2$}}\bigskip\pause + +\begin{itemize} +\item lookup: \bl{$\text{iload}\; index$} +\item store: \bl{$\text{istore}\; index$} +\end{itemize}\bigskip\pause + +while compilating we have to maintain a map between our identifiers and the +Java bytecode indices + +\begin{center} +\bl{$\text{compile}(a, E)$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +\begin{center} +\bl{\begin{tabular}{@{}lcl@{}} +$\text{compile}(n, E)$ & $\dn$ & $\text{ldc}\;n$\\ +$\text{compile}(a_1 + a_2, E)$ & $\dn$\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\;\text{compile}(a_2. E)\;@\; \text{iadd}$}\smallskip\\ +$\text{compile}(a_1 - a_2, E)$ & $\dn$\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\; \text{compile}(a_2, E)\;@\; \text{isub}$}\smallskip\\ +$\text{compile}(a_1 * a_2, E)$ & $\dn$\\ +\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\; \text{compile}(a_2, E)\;@\; \text{imul}$}\bigskip\\ +$\text{compile}(x, E)$ & $\dn$ & $\text{iload}\;E(x)$\\ +\end{tabular}} +\end{center}\pause + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling Statements\end{tabular}} + +We return a list of instructions and an environment for the variables + +\begin{center} +\bl{\begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}} +$\text{compile}(\text{skip}, E)$ & $\dn$ & $(N\!il, E)$\bigskip\\ +$\text{compile}(x := a, E)$ & $\dn$\\ +\multicolumn{3}{l}{$(\text{compile}(a, E) \;@\;\text{istore}\;index, E(x\mapsto index))$}\\ +\end{tabular}} +\end{center}\medskip + +where \bl{$index$} is \bl{$E(x)$} if it is already defined, or if it is not then the largest index not yet seen + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} + +{\Large\bl{$x := x + 1$}} + +\begin{center} +\bl{\begin{tabular}{l} +iload $n_x$\\ +ldc 1\\ +iadd\\ +istore $n_x$\\ +\end{tabular}} +\end{center} + +where \bl{$n_x$} is the index corresponding to the variable \bl{$x$} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling Ifs\end{tabular}} + +{\Large\bl{$\text{if}\;b\;\text{else}\;cs_1\;\text{then}\;cs_2$}}\bigskip\bigskip + +\onslide<2->{Case }\only<2>{{\bf True}:}\only<3>{{\bf False}:} + +\begin{center} +\begin{tikzpicture}[node distance=2mm and 4mm, + block/.style={rectangle, minimum size=1cm, draw=black, line width=1mm}, + point/.style={rectangle, inner sep=0mm, minimum size=0mm, fill=red}, + skip loop/.style={red, line width=1mm, to path={-- ++(0,-10mm) -| (\tikztotarget)}}] +\node (A1) [point] {}; +\node (b) [block, right=of A1] {code of \bl{$b$}}; +\node (A2) [point, right=of b] {}; +\node (cs1) [block, right=of A2] {code of \bl{$cs_1$}}; +\node (A3) [point, right=of cs1] {}; +\node (cs2) [block, right=of A3] {code of \bl{$cs_2$}}; +\node (A4) [point, right=of cs2] {}; + +\only<2>{ +\draw (A1) edge [->, red, line width=1mm] (b); +\draw (b) edge [->, red, line width=1mm] (cs1); +\draw (cs1) edge [->, red, line width=1mm] (A3); +\draw (A3) edge [->,skip loop] (A4); +\node [below=of cs2] {\raisebox{-5mm}{\small{}jump}};} +\only<3>{ +\draw (A1) edge [->, red, line width=1mm] (b); +\draw (b) edge [->, red, line width=1mm] (A2); +\draw (A2) edge [skip loop] (A3); +\draw (A3) edge [->, red, line width=1mm] (cs2); +\draw (cs2) edge [->,red, line width=1mm] (A4); +\node [below=of cs1] {\raisebox{-5mm}{\small{}conditional jump}};} +\end{tikzpicture} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Conditional Jumps\end{tabular}} + +\begin{minipage}{1.1\textwidth} +\begin{itemize} +\item \bl{if\_icmpeq $label$} if two ints are equal, then jump\medskip +\item \bl{if\_icmpne $label$} if two ints aren't equal, then jump\medskip +\item \bl{if\_icmpge $label$} if one int is greater or equal then another, then jump +\item[]\ldots +\end{itemize} +\end{minipage}\pause + + +\begin{center} +\bl{\begin{tabular}{l} +$L_1$:\\ +\hspace{5mm}if\_icmpeq\;$L_2$\\ +\hspace{5mm}iload 1\\ +\hspace{5mm}ldc 1\\ +\hspace{5mm}iadd\\ +\hspace{5mm}if\_icmpeq\;$L_1$\\ +$L_2$: +\end{tabular}} +\end{center} + +\begin{textblock}{3.5}(11,12) +\only<3>{labels must be unique} +\end{textblock} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling BExps\end{tabular}} + +{\Large\bl{$a_1 = a_2$}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$\text{compile}(a_1 = a_2, E, lab)$ & $\dn$\\ +\multicolumn{3}{l}{$\quad\text{compile}(a_1, E) \;@\;\text{compile}(a_2, E)\;@\; \text{if\_icmpne}\;lab$} +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling Ifs\end{tabular}} + +{\Large\bl{if $b$ then $cs_1$ else $cs_2$}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$\text{compile}(\text{if}\;b\;\text{then}\; cs_1\;\text{else}\; cs_2, E)$ & $\dn$\\ +\multicolumn{3}{l}{$\quad l_{ifelse}\;$ \textcolor{black}{(fresh label)}}\\ +\multicolumn{3}{l}{$\quad l_{ifend}\;$ \textcolor{black}{(fresh label)}}\\ +\multicolumn{3}{l}{$\quad (is_1, E') = \text{compile}(cs_1, E)$}\\ +\multicolumn{3}{l}{$\quad (is_2, E'') = \text{compile}(cs_2, E')$}\\ +\multicolumn{3}{l}{$\quad(\text{compile}(b, E, l_{ifelse})$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;is_1$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\; \text{goto}\;l_{ifend}$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;l_{ifelse}:$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;is_2$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;l_{ifend}:, E'')$}\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling Whiles\end{tabular}} + +{\Large\bl{$\text{while}\;b\;\text{do}\;cs$}}\bigskip\bigskip + +\onslide<2->{Case }\only<2>{{\bf True}:}\only<3>{{\bf False}:} + +\begin{center} +\begin{tikzpicture}[node distance=2mm and 4mm, + block/.style={rectangle, minimum size=1cm, draw=black, line width=1mm}, + point/.style={rectangle, inner sep=0mm, minimum size=0mm, fill=red}, + skip loop/.style={red, line width=1mm, to path={-- ++(0,-10mm) -| (\tikztotarget)}}] +\node (A0) [point, left=of A1] {}; +\node (A1) [point] {}; +\node (b) [block, right=of A1] {code of \bl{$b$}}; +\node (A2) [point, right=of b] {}; +\node (cs1) [block, right=of A2] {code of \bl{$cs$}}; +\node (A3) [point, right=of cs1] {}; +\node (A4) [point, right=of A3] {}; + +\only<2>{ +\draw (A0) edge [->, red, line width=1mm] (b); +\draw (b) edge [->, red, line width=1mm] (cs1); +\draw (cs1) edge [->, red, line width=1mm] (A3); +\draw (A3) edge [->,skip loop] (A1);} +\only<3>{ +\draw (A0) edge [->, red, line width=1mm] (b); +\draw (b) edge [->, red, line width=1mm] (A2); +\draw (A2) edge [skip loop] (A3); +\draw (A3) edge [->, red, line width=1mm] (A4);} +\end{tikzpicture} +\end{center} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling Whiles\end{tabular}} + +{\Large\bl{while $b$ do $cs$}} + +\begin{center} +\bl{\begin{tabular}{lcl} +$\text{compile}(\text{while}\; b\; \text{do} \;cs, E)$ & $\dn$\\ +\multicolumn{3}{l}{$\quad l_{wbegin}\;$ \textcolor{black}{(fresh label)}}\\ +\multicolumn{3}{l}{$\quad l_{wend}\;$ \textcolor{black}{(fresh label)}}\\ +\multicolumn{3}{l}{$\quad (is, E') = \text{compile}(cs_1, E)$}\\ +\multicolumn{3}{l}{$\quad(l_{wbegin}:$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;\text{compile}(b, E, l_{wend})$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;is$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\; \text{goto}\;l_{wbegin}$}\\ +\multicolumn{3}{l}{$\quad\phantom{(}@\;l_{wend}:, E')$}\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiling Writes\end{tabular}} + +{\Large\bl{write $x$}} + +\begin{center} +\small\bl{\begin{tabular}{l} +.method public static write(I)V\hspace{1cm}\textcolor{black}{(library function)}\\ +\;\; .limit locals 5 \\ +\;\; .limit stack 5 \\ +\;\; iload 0 \\ +\;\; getstatic java/lang/System/out Ljava/io/PrintStream;\\ +\;\; swap \\ +\;\; invokevirtual java/io/PrintStream/println(I)V \\ +\;\; return \\ +.end method\bigskip\bigskip\\ +% +\normalsize +iload $E(x)$\\ +invokestatic write(I)V\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\small\bl{\begin{tabular}{l} +.class public XXX.XXX\\ +.super java/lang/Object\\ +\\ +.method public ()V\\ +\;\; aload\_0\\ +\;\; invokenonvirtual java/lang/Object/()V\\ + \;\; return\\ +.end method\\ +\\ +.method public static main([Ljava/lang/String;)V\\ +\;\; .limit locals 200\\ +\;\; .limit stack 200\\ +\\ + \textcolor{black}{(here comes the compiled code)}\\ +\\ +\;\; return\\ +.end method\\ +\end{tabular}} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Next Compiler Phases\end{tabular}} + +\begin{itemize} +\item assembly $\Rightarrow$ byte code (class file) +\item labels $\Rightarrow$ absolute or relative jumps\bigskip\bigskip +\item \texttt{javap} is a disassembler for class files +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiled Code\end{tabular}} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] +\addplot+[smooth] file {compiled.data}; +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiled vs.~Interpreted Code\end{tabular}} + +\begin{center} +\begin{tikzpicture} +\begin{loglogaxis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] +\addplot+[smooth] file {interpreted.data}; +\addplot+[smooth] file {compiled.data}; +\end{loglogaxis} +\end{tikzpicture} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Compiled vs.~Interpreted Code\end{tabular}} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, + xlabel=n, + enlargelimits=0.05, + ybar interval=0.7, legend style=small] +\addplot file {interpreted2.data}; +\addplot file {compiled2.data}; +%\legend{interpreted, compiled} +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}What Next\end{tabular}} + +\begin{itemize} +\item register spilling +\item dead code removal +\item loop optimisations +\item instruction selection +\item type checking +\item concurrency +\item fuzzy testing +\item verification\bigskip\\ + +\item GCC, LLVM, tracing JITs +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides/slides10.pdf Binary file slides/slides10.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides/slides10.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides/slides10.tex Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,383 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usetikzlibrary{plotmarks} +\usepackage{graphicx} +\usepackage{pgfplots} + + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{while}{ + morekeywords={if,then,else,while,do,true,false,write}, + otherkeywords={=,!=,:=,<,>,;}, + sensitive=true, + morecomment=[n]{/*}{*/}, +} + + +\lstset{language=While, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + + +% beamer stuff +\renewcommand{\slidecaption}{AFL 10, King's College London, 5.~December 2012} +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + + +% The data files, written on the first run. +\begin{filecontents}{compiled.data} +%1 0.234146 +%5000 0.227539 +%10000 0.280748 +50000 1.087897 +100000 3.713165 +250000 21.6624545 +500000 85.872613 +750000 203.6408015 +1000000 345.736574 +\end{filecontents} + +\begin{filecontents}{interpreted.data} +%1 0.00503 +200 1.005863 +400 7.8296765 +500 15.43106 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 +\end{filecontents} + +\begin{filecontents}{interpreted2.data} +%1 0.00503 +200 1.005863 +400 7.8296765 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 +\end{filecontents} + +\begin{filecontents}{compiled2.data} +200 0.222058 +400 0.215204 +600 0.202031 +800 0.21986 +1000 0.205934 +1200 0.1981615 +1400 0.207116 +\end{filecontents} + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (10)\\[3mm] + \end{tabular}} + + \normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS (also home work is there)\\ + \end{tabular} + \end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\Large\bf +There are more problems, than there are programs.\bigskip\bigskip\pause\\ + +There must be a problem for which there is no program. +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Revision: Proofs} + +\begin{center} +\includegraphics[scale=0.4]{river-stones.jpg} +\end{center} + +\end{frame}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Subsets} + +\Large +\bl{$A \subseteq B$}\bigskip\bigskip\\ + +\bl{$\forall e.\; e \in A \Rightarrow e \in B$} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Subsets} + +\Large +\bl{$A \subseteq B$} and \bl{$B \subseteq A$}\bigskip + +then \bl{$A = B$} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Injective Function} + +\Large +\bl{f} is an injective function iff \bigskip + +\bl{$\forall x y.\; f(x) = f(y) \Rightarrow x = y$} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Cardinality} + +\Large +\bl{$|A|$} $\dn$ ``how many elements''\bigskip\\ + +\bl{$A \subseteq B \Rightarrow |A| \leq |B|$}\bigskip\\\pause + +if there is an injective function \bl{$f: A \rightarrow B$} then \bl{$|A| \leq |B|$}\ + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Natural Numbers} + +\Large +\bl{$\mathbb{N}$} \bl{$\dn$} \bl{$\{0, 1, 2, 3, .......\}$}\bigskip\pause + +\bl{$A$} is \alert{countable} iff \bl{$|A| \leq |\mathbb{N}|$} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{First Question} + +\Large +\bl{$|\mathbb{N} - \{0\}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\bigskip + +\normalsize +\bl{$\geq$} or \bl{$\leq$} or \bl{$=$} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\Large +\bl{$|\mathbb{N} - \{0, 1\}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\pause + +\bl{$|\mathbb{N} - \mathbb{O}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\bigskip + +\normalsize +\bl{$\mathbb{O}$} $\dn$ odd numbers\quad \bl{$\{1,3,5......\}$}\\ \pause +\bl{$\mathbb{E}$} $\dn$ even numbers\quad \bl{$\{0,2,4......\}$}\\ +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\Large +\bl{$|\mathbb{N} \cup \mathbb{-N}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\bigskip + + +\normalsize +\bl{$\mathbb{\phantom{-}N}$} $\dn$ positive numbers\quad \bl{$\{0,1,2,3,......\}$}\\ +\bl{$\mathbb{-N}$} $\dn$ negative numbers\quad \bl{$\{0,-1,-2,-3,......\}$}\\ +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\Large +\bl{$A$} is \alert{countable} if there exists an injective \bl{$f : A \rightarrow \mathbb{N}$}\bigskip + +\bl{$A$} is \alert{uncountable} if there does not exist an injective \bl{$f : A \rightarrow \mathbb{N}$}\bigskip\bigskip + + +countable: \bl{$|A| \leq |\mathbb{N}|$}\\ +uncountable: \bl{$|A| > |\mathbb{N}|$}\pause\bigskip + + +Does there exist such an \bl{$A$} ? + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Halting Problem} + +\large +Assume a program \bl{$H$} that decides for all programs \bl{$A$} and all +input data \bl{$D$} whether\bigskip + +\begin{itemize} +\item \bl{$H(A, D) \dn 1$} iff \bl{$A(D)$} terminates +\item \bl{$H(A, D) \dn 0$} otherwise +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Halting Problem (2)} + +\large +Given such a program \bl{$H$} define the following program \bl{$C$}: +for all programs \bl{$A$}\bigskip + +\begin{itemize} +\item \bl{$C(A) \dn 0$} iff \bl{$H(A, A) = 0$} +\item \bl{$C(A) \dn$ loops} otherwise +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{Contradiction} + + +\bl{$H(C, C)$} is either \bl{$0$} or \bl{$1$}. + +\begin{itemize} +\item \bl{$H(C, C) = 1$} $\stackrel{\text{def}\,H}{\Rightarrow}$ \bl{$C(C)\downarrow$} $\stackrel{\text{def}\,C}{\Rightarrow}$ \bl{$H(C, C)=0$} +\item \bl{$H(C, C) = 0$} $\stackrel{\text{def}\,H}{\Rightarrow}$ \bl{$C(C)$} loops $\stackrel{\text{def}\,C}{\Rightarrow}$\\ +\hspace{7cm}\bl{$H(C, C)=1$} +\end{itemize} + +Contradiction in both cases. So \bl{$H$} cannot exist. + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: + diff -r e85600529ca5 -r 4794759139ea slides01.pdf Binary file slides01.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides01.tex --- a/slides01.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,483 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 01, King's College London, 26.~September 2012} - - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (1)\\[-3mm] - \end{tabular}} - - \begin{center} - \includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} - \includegraphics[scale=0.31]{pics/ante2.jpg}\\ - \footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} - \end{center} - -\normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{textblock}{1}(2,5) -\begin{tabular}{c} -\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm] -\small Server -\end{tabular} -\end{textblock} - -\begin{textblock}{1}(5.6,4) - \begin{tikzpicture}[scale=1.1] - \draw[white] (0,1) node (X) {}; - \draw[white] (2,1) node (Y) {}; - \draw[white] (0,0) node (X1) {}; - \draw[white] (2,0) node (Y1) {}; - \draw[white] (0,-1) node (X2) {}; - \draw[white] (2,-1) node (Y2) {}; - \draw[red, <-, line width = 2mm] (X) -- (Y); - \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {}; - \draw[red, ->, line width = 2mm] (X1) -- (Y1); - \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {}; - \draw[red, <-, line width = 2mm] (X2) -- (Y2); - \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {}; - \end{tikzpicture} -\end{textblock} - - -\begin{textblock}{1}(9,5.5) -\begin{tabular}{c} -\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm] -\small Browser -\end{tabular} -\end{textblock} - -\only<2>{ -\begin{textblock}{10}(2,13.5) -\begin{itemize} -\item programming languages, compilers -\end{itemize} -\end{textblock}} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -transforming strings into structured data\\[10mm] - -{\LARGE\bf Lexing}\medskip\\ -\hspace{5mm}(recognising ``words'')\\[6mm] - -{\LARGE\bf Parsing}\medskip\\ -\hspace{5mm}(recognising ``sentences'') - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -The subject is quite old: - -\begin{itemize} -\item Turing Machines, 1936 -\item first compiler for COBOL, 1957 (Grace Hopper) -\item but surprisingly research papers are still published now -\end{itemize} - -\begin{flushright} -\includegraphics[scale=0.3]{pics/hopper.jpg}\\ -\footnotesize\textcolor{gray}{Grace Hopper} -\end{flushright} - -{\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show, \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}This Course\end{tabular}} - -\begin{itemize} -\item the ultimate goal is to implement a small web-browser (really small one)\bigskip -\end{itemize} - -Let's start with: - -\begin{itemize} -\item a web-crawler -\item an email harvester -\item a web-scraper -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}A Web-Crawler\end{tabular}} - -\mbox{}\\[10mm] - -\begin{enumerate} -\item given an URL, read the corresponding webpage -\item extract all links from it -\item call the web-crawler again for all these links -\end{enumerate} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}A Web-Crawler\end{tabular}} - -\mbox{}\\[10mm] - - -\begin{enumerate} -\item given an URL, read the corresponding webpage -\item if not possible print, out a problem -\item if possible, extract all links from it -\item call the web-crawler again for all these links -\end{enumerate}\bigskip\pause - -\small (we need a bound for the number of recursive calls) - -\small (the purpose is to check all links on my own webpage) -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Scala\end{tabular}} - -\footnotesize a simple Scala function for reading webpages\\[-3mm] - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app0.scala}}}\pause -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}}}\pause\bigskip - - -\footnotesize slightly more complicated for handling errors properly:\\[-3mm] - -\footnotesize -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app1.scala}}} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}A Regular Expression\end{tabular}} - -\begin{itemize} -\item \ldots{} is a pattern or template for specifying strings -\end{itemize}\bigskip - -\begin{center} -\only<1>{{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf -\texttt{"https?://[$\hat{\hspace{2mm}}$"]*"}}}% -\only<2>{{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf -\texttt{"""\textbackslash{}"https?://[$\hat{\hspace{2mm}}$\textbackslash{}"]*\textbackslash{}"""".r}}} -\end{center}\bigskip\bigskip - -matches for example\\ -\;{\lstset{language=Scala}\fontsize{12}{14}\selectfont\bf -\texttt{"http://www.foobar.com"}}\\ -\;{\lstset{language=Scala}\fontsize{12}{14}\selectfont\bf -\texttt{"https://www.tls.org"}}\\ - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf -\texttt{rexp.findAllIn(string)}}\medskip - -returns a list of all (sub)strings that match the regular expression\bigskip\bigskip - -{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf -\texttt{rexp.findFirstIn(string)}}\medskip - -returns either {\bf\texttt{None}} if no (sub)string matches -or {\bf\texttt{Some(s)}} with the first (sub)string - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app2.scala}}}\medskip - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{crawl(some\_start\_URL, 2)}}\ - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\footnotesize -a version that only ``crawls'' links in my domain: - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app3.scala}}} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\footnotesize -a little email ``harvester'': - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app4.scala}}}\bigskip - -\tiny -\textcolor{gray}{\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -Their inductive definition:\medskip - -\begin{textblock}{6}(2,5) - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ - \end{tabular} - \end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -\small -In Scala: - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app51.scala}}} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] Regular Expression\end{tabular}} - -\begin{textblock}{15}(1,4) - \begin{tabular}{@ {}rcl} - \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ - \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ - \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ - \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ - \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$ - $L$(r$_2$) $\}$}\\ - \bl{$L$(r$^*$)} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\ - \end{tabular}\bigskip - -\onslide<2->{ -\hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ -\bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ -\small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$ - $L$(r)$^n$ $\}$}} -} - \end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}} - -\large -a regular expression \bl{r} matches a string \bl{s} is defined as - -\begin{center} -\bl{s $\in$ $L$(r)}\\ -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}This Course\end{tabular}} - -We will have a look at: - -\begin{itemize} -\item regular expressions / regular expression matching -\item automata -\item the Myhill-Nerode theorem -\item parsing -\item grammars -\item a small interpreter / web browser -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Exam\end{tabular}} - -\begin{itemize} -\item The question ``Is this relevant for the exam?'' is not appreciated!\bigskip\\ - -Whatever is in the homework sheets (and is not marked ``optional'') is relevant for the -exam.\\ No code needs to be written in the exam. -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides02.pdf Binary file slides02.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides02.tex --- a/slides02.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,494 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 02, King's College London, 3.~October 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (2)\\[3mm] - \end{tabular}} - - %\begin{center} - %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} - %\includegraphics[scale=0.31]{pics/ante2.jpg}\\ - %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} - %\end{center} - -\normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Languages\end{tabular}} - -A \alert{language} is a set of strings.\bigskip - -A \alert{regular expression} specifies a set of strings or language. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -Their inductive definition: - - -\begin{textblock}{6}(2,5) - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ - \end{tabular} - \end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -Their implementation in Scala: - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app51.scala}}} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] Regular Expression\end{tabular}} - -\begin{textblock}{15}(1,4) - \begin{tabular}{@ {}rcl} - \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ - \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ - \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ - \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ - \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) @ $L$(r$_2$)}\\ - \bl{$L$(r$^*$)} & \bl{$\dn$} & \bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}\\ - \end{tabular}\bigskip - -\hspace{5mm}\textcolor{gray}{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ -\textcolor{gray}{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$} -\end{textblock} - -\only<2->{ -\begin{textblock}{5}(11,5) -\textcolor{gray}{\small -A @ B\\ -\ldots you take out every string from A and -concatenate it with every string in B -} -\end{textblock}} - -\only<3->{ -\begin{textblock}{6}(9,12)\small -\bl{$L$} is a function from regular expressions to sets of strings\\ -\bl{$L$ : Rexp $\Rightarrow$ Set[String]} -\end{textblock}} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\large -\begin{center} -What is \bl{$L$(a$^*$)}? -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -\newcommand{\YES}{\textcolor{gray}{yes}} -\newcommand{\NO}{\textcolor{gray}{no}} -\newcommand{\FORALLR}{\textcolor{gray}{$\forall$ r.}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Reg Exp Equivalences\end{tabular}} - -\begin{center} -\begin{tabular}{l@ {\hspace{7mm}}rcl@ {\hspace{7mm}}l} -&\bl{(a + b) + c} & \bl{$\equiv^?$} & \bl{a + (b + c)} & \onslide<2->{\YES}\\ -&\bl{a + a} & \bl{$\equiv^?$} & \bl{a} & \onslide<3->{\YES}\\ -&\bl{(a $\cdot$ b) $\cdot$ c} & \bl{$\equiv^?$} & \bl{a $\cdot$ (b $\cdot$ c)} & \onslide<4->{\YES}\\ -&\bl{a $\cdot$ a} & \bl{$\equiv^?$} & \bl{a} & \onslide<5->{\NO}\\ -&\bl{$\epsilon^*$} & \bl{$\equiv^?$} & \bl{$\epsilon$} & \onslide<6->{\YES}\\ -&\bl{$\varnothing^*$} & \bl{$\equiv^?$} & \bl{$\varnothing$} & \onslide<7->{\NO}\\ -\FORALLR &\bl{r $\cdot$ $\epsilon$} & \bl{$\equiv^?$} & \bl{r} & \onslide<8->{\YES}\\ -\FORALLR &\bl{r + $\epsilon$} & \bl{$\equiv^?$} & \bl{r} & \onslide<9->{\NO}\\ -\FORALLR &\bl{r + $\varnothing$} & \bl{$\equiv^?$} & \bl{r} & \onslide<10->{\YES}\\ -\FORALLR &\bl{r $\cdot$ $\varnothing$} & \bl{$\equiv^?$} & \bl{r} & \onslide<11->{\NO}\\ -&\bl{c $\cdot$ (a + b)} & \bl{$\equiv^?$} & \bl{(c $\cdot$ a) + (c $\cdot$ b)} & \onslide<12->{\YES}\\ -&\bl{a$^*$} & \bl{$\equiv^?$} & \bl{$\epsilon$ + (a $\cdot$ a$^*$)} & \onslide<13->{\YES} -\end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}} - -\large -a regular expression \bl{r} matches a string \bl{s} is defined as - -\begin{center} -\bl{s $\in$ $L$(r)}\\ -\end{center}\bigskip\bigskip\pause - -\small -if \bl{r$_1$ $\equiv$ r$_2$}, then \bl{$s$ $\in$ $L$(r$_1$)} iff \bl{$s$ $\in$ $L$(r$_2$)} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}A Matching Algorithm\end{tabular}} - -\begin{itemize} -\item given a regular expression \bl{r} and a string \bl{s}, say yes or no for whether -\begin{center} -\bl{s $\in$ $L$(r)} -\end{center} -or not.\bigskip\bigskip\pause -\end{itemize}\pause - -\small -\begin{itemize} -\item Identifiers (strings of letters or digits, starting with a letter) -\item Integers (a non-empty sequence of digits) -\item Keywords (else, if, while, \ldots) -\item White space (a non-empty sequence of blanks, newlines and tabs) -\end{itemize} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}A Matching Algorithm\end{tabular}} - -\small -whether a regular expression matches the empty string:\medskip - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app5.scala}}} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}} - -\large -If \bl{r} matches the string \bl{c::s}, what is a regular expression that matches \bl{s}?\bigskip\bigskip\bigskip\bigskip - -\small -\bl{der c r} gives the answer -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative of a Rexp (2)\end{tabular}} - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} - \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then $\epsilon$ else $\varnothing$} & \\ - \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ - \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ - & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ - & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ - \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)} &\smallskip\\\pause - - \bl{ders [] r} & \bl{$\dn$} & \bl{r} & \\ - \bl{ders (c::s) r} & \bl{$\dn$} & \bl{ders s (der c r)} & \\ - \end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative\end{tabular}} - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app6.scala}}} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Rexp Matcher\end{tabular}} - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app7.scala}}} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Proofs about Rexp\end{tabular}} - -Remember their inductive definition:\\[5cm] - -\begin{textblock}{6}(5,5) - \begin{tabular}{@ {}rrl} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$}\\ - & \bl{$\mid$} & \bl{$\epsilon$} \\ - & \bl{$\mid$} & \bl{c} \\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$}\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} \\ - & \bl{$\mid$} & \bl{r$^*$} \\ - \end{tabular} - \end{textblock} - -If we want to prove something, say a property \bl{$P$(r)}, for all regular expressions \bl{r} then \ldots - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Rexp (2)\end{tabular}} - -\begin{itemize} -\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip -\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip -\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}. -\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already -holds for \bl{r}. -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Rexp (3)\end{tabular}} - -Assume \bl{$P(r)$} is the property: - -\begin{center} -\bl{nullable(r)} if and only if \bl{"" $\in$ $L$(r)} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Strings\end{tabular}} - -If we want to prove something, say a property \bl{$P$(s)}, for all strings \bl{s} then \ldots\bigskip - -\begin{itemize} -\item \bl{$P$} holds for the empty string, and\medskip -\item \bl{$P$} holds for the string \bl{c::s} under the assumption that \bl{$P$} -already holds for \bl{s} -\end{itemize} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Strings (2)\end{tabular}} - -Let \bl{Der c A} be the set defined as - -\begin{center} -\bl{Der c A $\dn$ $\{$ s $|$ c::s $\in$ A$\}$ } -\end{center} - -Assume that \bl{$L$(der c r) = Der c ($L$(r))}. Prove that - -\begin{center} -\bl{matcher(r, s) if and only if s $\in$ $L$(r)} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Regular Languages\end{tabular}} - -A language (set of strings) is \alert{regular} iff there exists -a regular expression that recognises all its strings. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Automata\end{tabular}} - -A deterministic finite automaton consists of: - -\begin{itemize} -\item a set of states -\item one of these states is the start state -\item some states are accepting states, and -\item there is transition function\medskip - -\small -which takes a state as argument and a character and produces a new state\smallskip\\ -this function might not always be defined -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides03.pdf Binary file slides03.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides03.tex --- a/slides03.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,386 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 03, King's College London, 10.~October 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (3)\\[3mm] - \end{tabular}} - - %\begin{center} - %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} - %\includegraphics[scale=0.31]{pics/ante2.jpg}\\ - %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} - %\end{center} - -\normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - & \alert{\bf (I have put a temporary link in there.)}\\ - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Last Week\end{tabular}} - -Last week I showed you - -\begin{itemize} -\item one simple-minded regular expression matcher (which however does not work in all cases), and\bigskip -\item one which works provably in all cases - -\begin{center} -\bl{matcher r s} \;\;if and only if \;\; \bl{s $\in$ $L$(r)} -\end{center} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}} - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} - \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then $\epsilon$ else $\varnothing$} & \\ - \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ - \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ - & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ - & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ - \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)}\\ - \end{tabular} -\end{center} - -``the regular expression after \bl{c} has been recognised'' - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -For this we defined the set \bl{Der c A} as - -\begin{center} -\bl{Der c A $\dn$ $\{$ s $|$ c::s $\in$ A$\}$ } -\end{center} - -which is called the semantic derivative of a set -and proved - -\begin{center} -\bl{$L$(der c r) $=$ Der c ($L$(r))} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Idea of the Algorithm\end{tabular}} - -If we want to recognise the string \bl{abc} with regular expression \bl{r} -then\medskip - -\begin{enumerate} -\item \bl{Der a ($L$(r))}\pause -\item \bl{Der b (Der a ($L$(r)))} -\item \bl{Der c (Der b (Der a ($L$(r))))}\pause -\item finally we test whether the empty string is in set\pause\medskip -\end{enumerate} - -The matching algorithm works similarly, just over regular expression than sets. -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Input: string \bl{abc} and regular expression \bl{r} - -\begin{enumerate} -\item \bl{der a r} -\item \bl{der b (der a r)} -\item \bl{der c (der b (der a r))}\pause -\item finally check whether the latter regular expression can match the empty string -\end{enumerate} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -We need to prove - -\begin{center} -\bl{$L$(der c r) $=$ Der c ($L$(r))} -\end{center} - -by induction on the regular expression. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Rexp\end{tabular}} - -\begin{itemize} -\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip -\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip -\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}. -\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already -holds for \bl{r}. -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Natural Numbers\\ and Strings\end{tabular}} - -\begin{itemize} -\item \bl{$P$} holds for \bl{$0$} and -\item \bl{$P$} holds for \bl{$n + 1$} under the assumption that \bl{$P$} already -holds for \bl{$n$} -\end{itemize}\bigskip - -\begin{itemize} -\item \bl{$P$} holds for \bl{\texttt{""}} and -\item \bl{$P$} holds for \bl{$c\!::\!s$} under the assumption that \bl{$P$} already -holds for \bl{$s$} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -\begin{center} - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ - \end{tabular}\bigskip\pause - \end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Languages\end{tabular}} - -A \alert{language} is a set of strings.\bigskip - -A \alert{regular expression} specifies a set of strings or language.\bigskip - -A language is \alert{regular} iff there exists -a regular expression that recognises all its strings.\bigskip\bigskip\pause - -\textcolor{gray}{not all languages are regular, e.g.~\bl{a$^n$b$^n$}.} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -\begin{center} - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ - \end{tabular}\bigskip - \end{center} - -How about ranges \bl{[a-z]}, \bl{r$^\text{+}$} and \bl{!r}? - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Negation of Regular Expr's\end{tabular}} - -\begin{itemize} -\item \bl{!r} \hspace{6mm} (everything that \bl{r} cannot recognise)\medskip -\item \bl{$L$(!r) $\dn$ UNIV - $L$(r)}\medskip -\item \bl{nullable (!r) $\dn$ not (nullable(r))}\medskip -\item \bl{der\,c\,(!r) $\dn$ !(der\,c\,r)} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Regular Exp's for Lexing\end{tabular}} - -Lexing separates strings into ``words'' / components. - -\begin{itemize} -\item Identifiers (non-empty strings of letters or digits, starting with a letter) -\item Numbers (non-empty sequences of digits omitting leading zeros) -\item Keywords (else, if, while, \ldots) -\item White space (a non-empty sequence of blanks, newlines and tabs) -\item Comments -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Automata\end{tabular}} - -A deterministic finite automaton consists of: - -\begin{itemize} -\item a set of states -\item one of these states is the start state -\item some states are accepting states, and -\item there is transition function\medskip - -\small -which takes a state as argument and a character and produces a new state\smallskip\\ -this function might not always be defined -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides04.pdf Binary file slides04.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides04.tex --- a/slides04.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,612 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 04, King's College London, 17.~October 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (4)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Last Week\end{tabular}} - -Last week I showed you\bigskip - -\begin{itemize} -\item a tokenizer taking a list of regular expressions\bigskip - -\item tokenization identifies lexeme in an input stream of characters (or string) -and cathegorizes them into tokens - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Two Rules\end{tabular}} - -\begin{itemize} -\item Longest match rule (maximal munch rule): The -longest initial substring matched by any regular expression is taken -as next token.\bigskip - -\item Rule priority: -For a particular longest initial substring, the first regular -expression that can match determines the token. - -\end{itemize} - -%\url{http://www.technologyreview.com/tr10/?year=2011} - -%finite deterministic automata/ nondeterministic automaton - -%\item problem with infix operations, for example i-12 - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\mode{ -\begin{frame}[t] - -\begin{center} -\texttt{"if true then then 42 else +"} -\end{center} - - -\begin{tabular}{@{}l} -KEYWORD: \\ -\hspace{5mm}\texttt{"if"}, \texttt{"then"}, \texttt{"else"},\\ -WHITESPACE:\\ -\hspace{5mm}\texttt{" "}, \texttt{"$\backslash$n"},\\ -IDENT:\\ -\hspace{5mm}LETTER $\cdot$ (LETTER + DIGIT + \texttt{"\_"})$^*$\\ -NUM:\\ -\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + \texttt{"0"}\\ -OP:\\ -\hspace{5mm}\texttt{"+"}\\ -COMMENT:\\ -\hspace{5mm}\texttt{"$\slash$*"} $\cdot$ (ALL$^*$ $\cdot$ \texttt{"*$\slash$"} $\cdot$ ALL$^*$) $\cdot$ \texttt{"*$\slash$"} -\end{tabular} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] - -\begin{center} -\texttt{"if true then then 42 else +"} -\end{center} - -\only<1>{ -\small\begin{tabular}{l} -KEYWORD(if),\\ -WHITESPACE,\\ -IDENT(true),\\ -WHITESPACE,\\ -KEYWORD(then),\\ -WHITESPACE,\\ -KEYWORD(then),\\ -WHITESPACE,\\ -NUM(42),\\ -WHITESPACE,\\ -KEYWORD(else),\\ -WHITESPACE,\\ -OP(+) -\end{tabular}} - -\only<2>{ -\small\begin{tabular}{l} -KEYWORD(if),\\ -IDENT(true),\\ -KEYWORD(then),\\ -KEYWORD(then),\\ -NUM(42),\\ -KEYWORD(else),\\ -OP(+) -\end{tabular}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - - -There is one small problem with the tokenizer. How should we -tokenize: - -\begin{center} -\texttt{"x - 3"} -\end{center} - -\begin{tabular}{@{}l} -OP:\\ -\hspace{5mm}\texttt{"+"}, \texttt{"-"}\\ -NUM:\\ -\hspace{5mm}(NONZERODIGIT $\cdot$ DIGIT$^*$) + \texttt{"0"}\\ -NUMBER:\\ -\hspace{5mm}NUM + (\texttt{"-"} $\cdot$ NUM)\\ -\end{tabular} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Negation\end{tabular}} - -Assume you have an alphabet consisting of the letters \bl{a}, \bl{b} and \bl{c} only. -Find a regular expression that matches all strings \emph{except} \bl{ab}, \bl{ac} and \bl{cba}. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Deterministic Finite Automata\end{tabular}} - -A deterministic finite automaton consists of: - -\begin{itemize} -\item a finite set of states -\item one of these states is the start state -\item some states are accepting states, and -\item there is transition function\medskip - -\small -which takes a state and a character as arguments and produces a new state\smallskip\\ -this function might not always be defined everywhere -\end{itemize} - -\begin{center} -\bl{$A(Q, q_0, F, \delta)$} -\end{center} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\includegraphics[scale=0.7]{pics/ch3.jpg} -\end{center}\pause - -\begin{itemize} -\item start can be an accepting state -\item it is possible that there is no accepting state -\item all states might be accepting (but does not necessarily mean all strings are accepted) -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\includegraphics[scale=0.7]{pics/ch3.jpg} -\end{center} - -for this automaton \bl{$\delta$} is the function\\ - -\begin{center} -\begin{tabular}{lll} -\bl{(q$_0$, a) $\rightarrow$ q$_1$} & \bl{(q$_1$, a) $\rightarrow$ q$_4$} & \bl{(q$_4$, a) $\rightarrow$ q$_4$}\\ -\bl{(q$_0$, b) $\rightarrow$ q$_2$} & \bl{(q$_1$, b) $\rightarrow$ q$_2$} & \bl{(q$_4$, b) $\rightarrow$ q$_4$}\\ -\end{tabular}\ldots -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Accepting a String\end{tabular}} - -Given - -\begin{center} -\bl{$A(Q, q_0, F, \delta)$} -\end{center} - -you can define - -\begin{center} -\begin{tabular}{l} -\bl{$\hat{\delta}(q, \texttt{""}) = q$}\\ -\bl{$\hat{\delta}(q, c::s) = \hat{\delta}(\delta(q, c), s)$}\\ -\end{tabular} -\end{center}\pause - -Whether a string \bl{$s$} is accepted by \bl{$A$}? - -\begin{center} -\hspace{5mm}\bl{$\hat{\delta}(q_0, s) \in F$} -\end{center} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Non-Deterministic\\[-1mm] Finite Automata\end{tabular}} - -A non-deterministic finite automaton consists again of: - -\begin{itemize} -\item a finite set of states -\item one of these states is the start state -\item some states are accepting states, and -\item there is transition \alert{relation}\medskip -\end{itemize} - - -\begin{center} -\begin{tabular}{c} -\bl{(q$_1$, a) $\rightarrow$ q$_2$}\\ -\bl{(q$_1$, a) $\rightarrow$ q$_3$}\\ -\end{tabular} -\hspace{10mm} -\begin{tabular}{c} -\bl{(q$_1$, $\epsilon$) $\rightarrow$ q$_2$}\\ -\end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\includegraphics[scale=0.7]{pics/ch5.jpg} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\begin{tabular}[b]{ll} -\bl{$\varnothing$} & \includegraphics[scale=0.7]{pics/NULL.jpg}\\\\ -\bl{$\epsilon$} & \includegraphics[scale=0.7]{pics/epsilon.jpg}\\\\ -\bl{c} & \includegraphics[scale=0.7]{pics/char.jpg}\\ -\end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\begin{tabular}[t]{ll} -\bl{r$_1$ $\cdot$ r$_2$} & \includegraphics[scale=0.6]{pics/seq.jpg}\\\\ -\end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\begin{tabular}[t]{ll} -\bl{r$_1$ + r$_2$} & \includegraphics[scale=0.7]{pics/alt.jpg}\\\\ -\end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\begin{tabular}[b]{ll} -\bl{r$^*$} & \includegraphics[scale=0.7]{pics/star.jpg}\\ -\end{tabular} -\end{center}\pause\bigskip - -Why can't we just have an epsilon transition from the accepting states to the starting state? - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Subset Construction\end{tabular}} - - -\begin{textblock}{5}(1,2.5) -\includegraphics[scale=0.5]{pics/ch5.jpg} -\end{textblock} - -\begin{textblock}{11}(6.5,4.5) -\begin{tabular}{r|cl} -& a & b\\ -\hline -$\varnothing$ \onslide<2>{\textcolor{white}{*}} & $\varnothing$ & $\varnothing$\\ -$\{0\}$ \onslide<2>{\textcolor{white}{*}} & $\{0,1,2\}$ & $\{2\}$\\ -$\{1\}$ \onslide<2>{\textcolor{white}{*}} &$\{1\}$ & $\varnothing$\\ -$\{2\}$ \onslide<2>{*} & $\varnothing$ &$\{2\}$\\ -$\{0,1\}$ \onslide<2>{\textcolor{white}{*}} &$\{0,1,2\}$ &$\{2\}$\\ -$\{0,2\}$ \onslide<2>{*}&$\{0,1,2\}$ &$\{2\}$\\ -$\{1,2\}$ \onslide<2>{*}& $\{1\}$ & $\{2\}$\\ -\onslide<2>{s:} $\{0,1,2\}$ \onslide<2>{*}&$\{0,1,2\}$ &$\{2\}$\\ -\end{tabular} -\end{textblock} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Regular Languages\end{tabular}} - -A language is \alert{regular} iff there exists -a regular expression that recognises all its strings.\bigskip\medskip - -or equivalently\bigskip\medskip - -A language is \alert{regular} iff there exists -a deterministic finite automaton that recognises all its strings.\bigskip\pause - -Why is every finite set of strings a regular language? -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\includegraphics[scale=0.5]{pics/ch3.jpg} -\end{center} - -\begin{center} -\includegraphics[scale=0.5]{pics/ch4.jpg}\\ -minimal automaton -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{enumerate} -\item Take all pairs \bl{(q, p)} with \bl{q $\not=$ p} -\item Mark all pairs that accepting and non-accepting states -\item For all unmarked pairs \bl{(q, p)} and all characters \bl{c} tests wether -\begin{center} -\bl{($\delta$(q,c), $\delta$(p,c))} -\end{center} -are marked. If yes, then also mark \bl{(q, p)} -\item Repeat last step until no chance. -\item All unmarked pairs can be merged. -\end{enumerate} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Given the function - -\begin{center} -\bl{\begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l} -$rev(\varnothing)$ & $\dn$ & $\varnothing$\\ -$rev(\epsilon)$ & $\dn$ & $\epsilon$\\ -$rev(c)$ & $\dn$ & $c$\\ -$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\ -$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\ -$rev(r^*)$ & $\dn$ & $rev(r)^*$\\ -\end{tabular}} -\end{center} - - -and the set - -\begin{center} -\bl{$Rev\,A \dn \{s^{-1} \;|\; s \in A\}$} -\end{center} - -prove whether - -\begin{center} -\bl{$L(rev(r)) = Rev (L(r))$} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{itemize} -\item The star-case in our proof about the matcher needs the following lemma -\begin{center} -\bl{Der\,c\,A$^*$ $=$ (Der c A)\,@\, A$^*$} -\end{center} -\end{itemize}\bigskip\bigskip - -\begin{itemize} -\item If \bl{\texttt{""} $\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B $\cup$ (Der\,c\,B)}\medskip -\item If \bl{\texttt{""} $\not\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B} - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{itemize} -\item Assuming you have the alphabet \bl{\{a, b, c\}}\bigskip -\item Give a regular expression that can recognise all strings that have at least one \bl{b}. -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -``I hate coding. I do not want to look at code.'' - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides05.pdf Binary file slides05.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides05.tex --- a/slides05.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,504 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 05, King's College London, 24.~October 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (5)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Deterministic Finite Automata\end{tabular}} - -A DFA \bl{$A(Q, q_0, F, \delta)$} consists of: - -\begin{itemize} -\item a finite set of states \bl{$Q$} -\item one of these states is the start state \bl{$q_0$} -\item some states are accepting states \bl{$F$} -\item a transition function \bl{$\delta$} -\end{itemize}\pause - -\onslide<2->{ -\begin{center} -\begin{tabular}{l} -\bl{$\hat{\delta}(q, \texttt{""}) = q$}\\ -\bl{$\hat{\delta}(q, c\!::\!s) = \hat{\delta}(\delta(q, c), s)$} -\end{tabular} -\end{center}} - -\only<3,4>{ -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \node[state, initial] (q02) at ( 0,1) {$q_{0}$}; - \node[state] (q13) at ( 1,1) {$q_{1}$}; - \node[state, accepting] (q4) at ( 2,1) {$q_2$}; - \path[->] (q02) edge[bend left] node[above] {$a$} (q13) - (q13) edge[bend left] node[below] {$b$} (q02) - (q13) edge node[above] {$a$} (q4) - (q02) edge [loop below] node {$b$} () - (q4) edge [loop right] node {$a, b$} () - ; -\end{tikzpicture} -\end{center}}% -% -\only<5>{ -\begin{center} -\bl{$L(A) \dn \{ s \;|\; \hat{\delta}(q_0, s) \in F\}$} -\end{center}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Non-Deterministic\\[-1mm] Finite Automata\end{tabular}} - -An NFA \bl{$A(Q, q_0, F, \delta)$} consists again of: - -\begin{itemize} -\item a finite set of states -\item one of these states is the start state -\item some states are accepting states -\item a transition \alert{relation}\medskip -\end{itemize} - - -\begin{center} -\begin{tabular}{c} -\bl{(q$_1$, a) $\rightarrow$ q$_2$}\\ -\bl{(q$_1$, a) $\rightarrow$ q$_3$}\\ -\end{tabular} -\hspace{10mm} -\begin{tabular}{c} -\bl{(q$_1$, $\epsilon$) $\rightarrow$ q$_2$}\\ -\end{tabular} -\end{center}\pause\medskip - -A string \bl{s} is accepted by an NFA, if there is a ``lucky'' sequence to an accepting state. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Last Week\end{tabular}} - -Last week I showed you\bigskip - -\begin{itemize} -\item an algorithm for automata minimisation - -\item an algorithm for transforming a regular expression into an NFA - -\item an algorithm for transforming an NFA into a DFA (subset construction) - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}This Week\end{tabular}} - -Go over the algorithms again, but with two new things and \ldots\medskip - -\begin{itemize} -\item with the example: what is the regular expression that accepts every string, except those ending -in \bl{aa}?\medskip - -\item Go over the proof for \bl{$L(rev(r)) = Rev(L(r))$}.\medskip - -\item Anything else so far. -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs By Induction\end{tabular}} - -\begin{itemize} -\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip -\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip -\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}. -\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already -holds for \bl{r}. -\end{itemize} - -\begin{center} -\bl{$P(r):\;\;L(rev(r)) = Rev(L(r))$} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] - -What is the regular expression that accepts every string, except those ending -in \bl{aa}?\pause\bigskip - -\begin{center} -\begin{tabular}{l} -\bl{(a + b)$^*$ba}\\ -\bl{(a + b)$^*$ab}\\ -\bl{(a + b)$^*$bb}\\\pause -\bl{a}\\ -\bl{\texttt{""}} -\end{tabular} -\end{center}\pause - -What are the strings to be avoided?\pause\medskip - -\begin{center} -\bl{(a + b)$^*$aa} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] - -An NFA for \bl{(a + b)$^*$aa} - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \node[state, initial] (q0) at ( 0,1) {$q_0$}; - \node[state] (q1) at ( 1,1) {$q_1$}; - \node[state, accepting] (q2) at ( 2,1) {$q_2$}; - \path[->] (q0) edge node[above] {$a$} (q1) - (q1) edge node[above] {$a$} (q2) - (q0) edge [loop below] node {$a$} () - (q0) edge [loop above] node {$b$} () - ; -\end{tikzpicture} -\end{center}\pause - -Minimisation for DFAs\\ -Subset Construction for NFAs - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}DFA Minimisation\end{tabular}} - - -\begin{enumerate} -\item Take all pairs \bl{(q, p)} with \bl{q $\not=$ p} -\item Mark all pairs that accepting and non-accepting states -\item For all unmarked pairs \bl{(q, p)} and all characters \bl{c} tests wether -\begin{center} -\bl{($\delta$(q,c), $\delta$(p,c))} -\end{center} -are marked. If yes, then also mark \bl{(q, p)}. -\item Repeat last step until nothing changed. -\item All unmarked pairs can be merged. -\end{enumerate} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Minimal DFA \only<1>{\bl{(a + b)$^*$aa}}\only<2->{\alert{not} \bl{(a + b)$^*$aa}} - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \only<1>{\node[state, initial] (q0) at ( 0,1) {$q_0$};} - \only<2->{\node[state, initial,accepting] (q0) at ( 0,1) {$q_0$};} - \only<1>{\node[state] (q1) at ( 1,1) {$q_1$};} - \only<2->{\node[state,accepting] (q1) at ( 1,1) {$q_1$};} - \only<1>{\node[state, accepting] (q2) at ( 2,1) {$q_2$};} - \only<2->{\node[state] (q2) at ( 2,1) {$q_2$};} - \path[->] (q0) edge[bend left] node[above] {$a$} (q1) - (q1) edge[bend left] node[above] {$b$} (q0) - (q2) edge[bend left=50] node[below] {$b$} (q0) - (q1) edge node[above] {$a$} (q2) - (q2) edge [loop right] node {$a$} () - (q0) edge [loop below] node {$b$} () - ; -\end{tikzpicture} -\end{center} - -\onslide<3>{How to get from a DFA to a regular expression?} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \only<1->{\node[state, initial] (q0) at ( 0,1) {$q_0$};} - \only<1->{\node[state] (q1) at ( 1,1) {$q_1$};} - \only<1->{\node[state] (q2) at ( 2,1) {$q_2$};} - \path[->] (q0) edge[bend left] node[above] {$a$} (q1) - (q1) edge[bend left] node[above] {$b$} (q0) - (q2) edge[bend left=50] node[below] {$b$} (q0) - (q1) edge node[above] {$a$} (q2) - (q2) edge [loop right] node {$a$} () - (q0) edge [loop below] node {$b$} () - ; -\end{tikzpicture} -\end{center}\pause\bigskip - -\onslide<2->{ -\begin{center} -\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} -\bl{$q_0$} & \bl{$=$} & \bl{$2\, q_0 + 3 \,q_1 + 4\, q_2$}\\ -\bl{$q_1$} & \bl{$=$} & \bl{$2 \,q_0 + 3\, q_1 + 1\, q_2$}\\ -\bl{$q_2$} & \bl{$=$} & \bl{$1\, q_0 + 5\, q_1 + 2\, q_2$}\\ - -\end{tabular} -\end{center} -} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \only<1->{\node[state, initial] (q0) at ( 0,1) {$q_0$};} - \only<1->{\node[state] (q1) at ( 1,1) {$q_1$};} - \only<1->{\node[state] (q2) at ( 2,1) {$q_2$};} - \path[->] (q0) edge[bend left] node[above] {$a$} (q1) - (q1) edge[bend left] node[above] {$b$} (q0) - (q2) edge[bend left=50] node[below] {$b$} (q0) - (q1) edge node[above] {$a$} (q2) - (q2) edge [loop right] node {$a$} () - (q0) edge [loop below] node {$b$} () - ; -\end{tikzpicture} -\end{center}\bigskip - -\onslide<2->{ -\begin{center} -\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} -\bl{$q_0$} & \bl{$=$} & \bl{$\epsilon + q_0\,b + q_1\,b + q_2\,b$}\\ -\bl{$q_1$} & \bl{$=$} & \bl{$q_0\,a$}\\ -\bl{$q_2$} & \bl{$=$} & \bl{$q_1\,a + q_2\,a$}\\ - -\end{tabular} -\end{center} -} - -\onslide<3->{ -Arden's Lemma: -\begin{center} -If \bl{$q = q\,r + s$}\; then\; \bl{$q = s\, r^*$} -\end{center} -} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Algorithms on Automata\end{tabular}} - - -\begin{itemize} -\item Reg $\rightarrow$ NFA: Thompson-McNaughton-Yamada method\medskip -\item NFA $\rightarrow$ DFA: Subset Construction\medskip -\item DFA $\rightarrow$ Reg: Brzozowski's Algebraic Method\medskip -\item DFA minimisation: Hopcrofts Algorithm\medskip -\item complement DFA -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\newcommand{\qq}{\mbox{\texttt{"}}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Grammars\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $F + (F \cdot \qq*\qq \cdot F) + (F \cdot \qq\backslash\qq \cdot F)$\\ -$F$ & $\rightarrow$ & $T + (T \cdot \qq\texttt{+}\qq \cdot T) + (T \cdot \qq\texttt{-}\qq \cdot T)$\\ -$T$ & $\rightarrow$ & $num + (\qq\texttt{(}\qq \cdot E \cdot \qq\texttt{)}\qq)$\\ -\end{tabular}} -\end{center} - -\bl{$E$}, \bl{$F$} and \bl{$T$} are non-terminals\\ -\bl{$E$} is start symbol\\ -\bl{$num$}, \bl{(}, \bl{)}, \bl{+} \ldots are terminals\bigskip\\ - - -\bl{\texttt{(2*3)+(3+4)}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $F + (F \cdot \qq*\qq \cdot F) + (F \cdot \qq\backslash\qq \cdot F)$\\ -$F$ & $\rightarrow$ & $T + (T \cdot \qq\texttt{+}\qq \cdot T) + (T \cdot \qq\texttt{-}\qq \cdot T)$\\ -$T$ & $\rightarrow$ & $num + (\qq\texttt{(}\qq \cdot E \cdot \qq\texttt{)}\qq)$\\ -\end{tabular}} -\end{center} - -\begin{center} -\begin{tikzpicture}[level distance=8mm, blue] - \node {E} - child {node {F} - child {node {T} - child {node {\qq(\qq\,E\,\qq)\qq} - child {node{F \qq*\qq{} F} - child {node {T} child {node {2}}} - child {node {T} child {node {3}}} - } - } - } - child {node {\qq+\qq}} - child {node {T} - child {node {\qq(\qq\,E\,\qq)\qq} - child {node {F} - child {node {T \qq+\qq{} T} - child {node {3}} - child {node {4}} - } - }} - }}; -\end{tikzpicture} -\end{center} - -\begin{textblock}{5}(1, 5) -\bl{\texttt{(2*3)+(3+4)}} -\end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides06.pdf Binary file slides06.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides06.tex --- a/slides06.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,579 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usetikzlibrary{plotmarks} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 06, King's College London, 31.~October 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - - -% The data files, written on the first run. -\begin{filecontents}{re-python.data} -1 0.029 -5 0.029 -10 0.029 -15 0.032 -16 0.042 -17 0.042 -18 0.055 -19 0.084 -20 0.136 -21 0.248 -22 0.464 -23 0.899 -24 1.773 -25 3.505 -26 6.993 -27 14.503 -28 29.307 -#29 58.886 -\end{filecontents} - -\begin{filecontents}{re1.data} -1 0.00179 -2 0.00011 -3 0.00014 -4 0.00026 -5 0.00050 -6 0.00095 -7 0.00190 -8 0.00287 -9 0.00779 -10 0.01399 -11 0.01894 -12 0.03666 -13 0.07994 -14 0.08944 -15 0.02377 -16 0.07392 -17 0.22798 -18 0.65310 -19 2.11360 -20 6.31606 -21 21.46013 -\end{filecontents} - -\begin{filecontents}{re2.data} -1 0.00240 -2 0.00013 -3 0.00020 -4 0.00030 -5 0.00049 -6 0.00083 -7 0.00146 -8 0.00228 -9 0.00351 -10 0.00640 -11 0.01217 -12 0.02565 -13 0.01382 -14 0.02423 -15 0.05065 -16 0.06522 -17 0.02140 -18 0.05176 -19 0.18254 -20 0.51898 -21 1.39631 -22 2.69501 -23 8.07952 -\end{filecontents} - -\begin{filecontents}{re-internal.data} -1 0.00069 -301 0.00700 -601 0.00297 -901 0.00470 -1201 0.01301 -1501 0.01175 -1801 0.01761 -2101 0.01787 -2401 0.02717 -2701 0.03932 -3001 0.03470 -3301 0.04349 -3601 0.05411 -3901 0.06181 -4201 0.07119 -4501 0.08578 -\end{filecontents} - -\begin{filecontents}{re3.data} -1 0.001605 -501 0.131066 -1001 0.057885 -1501 0.136875 -2001 0.176238 -2501 0.254363 -3001 0.37262 -3501 0.500946 -4001 0.638384 -4501 0.816605 -5001 1.00491 -5501 1.232505 -6001 1.525672 -6501 1.757502 -7001 2.092784 -7501 2.429224 -8001 2.803037 -8501 3.463045 -9001 3.609 -9501 4.081504 -10001 4.54569 -\end{filecontents} -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (6)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -``I hate coding. I do not want to look at code.'' - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -``I am appalled. You do not show code anymore.'' - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}ReDoS\end{tabular}} - -\begin{itemize} -\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice\bigskip -\item ``Regular Expressions Will Stab You in the Back''\bigskip -\item Evil regular expressions\medskip -\begin{itemize} -\item \bl{$(a?\{n\})a\{n\}$} -\item \bl{$(a^+)^+$} -\item \bl{$([a-zA-Z]^+)^*$} -\item \bl{$(a + aa)^+$} -\item \bl{$(a + a?)^+$} -\end{itemize} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Regexp Matching\end{tabular}} - -Given a regular expression - -\begin{enumerate} -\item you might convert it into a DFA (subset construction) -\item you might try all possible paths in an NFA via backtracking -\item you might try all paths in an NFA in parallel -\item you might try to convert the DFA ``lazily'' -\end{enumerate}\bigskip - -Often No~2 is implemented (sometimes there are even good reasons for doing this). - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}\bl{$(a?\{n\})a\{n\}$} in Python\end{tabular}} - -\begin{tikzpicture}[y=.2cm, x=.3cm] - %axis - \draw (0,0) -- coordinate (x axis mid) (30,0); - \draw (0,0) -- coordinate (y axis mid) (0,30); - %ticks - \foreach \x in {0,5,...,30} - \draw (\x,1pt) -- (\x,-3pt) - node[anchor=north] {\x}; - \foreach \y in {0,5,...,30} - \draw (1pt,\y) -- (-3pt,\y) - node[anchor=east] {\y}; - %labels - \node[below=0.6cm] at (x axis mid) {\bl{a}s}; - \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; - - %plots - \draw[color=blue] plot[mark=*, mark options={fill=white}] - file {re-python.data}; - \only<2->{ - \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] - file {re1.data};} - \only<3->{ - \draw[color=green] plot[mark=square*, mark options={fill=white} ] - file {re2.data};} - - %legend - \begin{scope}[shift={(4,20)}] - \draw[color=blue] (0,0) -- - plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Python}; - \only<2->{\draw[yshift=\baselineskip, color=red] (0,0) -- - plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala V1};} - \only<3->{ - \draw[yshift=2\baselineskip, color=green] (0,0) -- - plot[mark=square*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala V2 with simplifications};} - \end{scope} -\end{tikzpicture} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - - -\begin{tikzpicture}[y=.7cm, x=.0009cm] - %axis - \draw (0,0) -- coordinate (x axis mid) (10000,0); - \draw (0,0) -- coordinate (y axis mid) (0,6); - %ticks - \foreach \x in {0,2000,...,10000} - \draw (\x,1pt) -- (\x,-3pt) - node[anchor=north] {\x}; - \foreach \y in {0,1,...,6} - \draw (1pt,\y) -- (-3pt,\y) - node[anchor=east] {\y}; - %labels - \node[below=0.6cm] at (x axis mid) {\bl{a}s}; - \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; - - %plots - \draw[color=blue] plot[mark=*, mark options={fill=white}] - file {re-internal.data}; - \only<2->{ - \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] - file {re3.data};} - - %legend - \begin{scope}[shift={(2000,4)}] - \draw[color=blue] (0,0) -- - plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala Internal}; - \only<2->{ - \draw[yshift=\baselineskip, color=red] (0,0) -- - plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala V3 with explicit $\_\{\_\}$};} - \end{scope} -\end{tikzpicture} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -\newcommand{\qq}{\mbox{\texttt{"}}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Grammars\end{tabular}} - -A (context-free) grammar \bl{$G$} consists of - -\begin{itemize} -\item a finite set of nonterminal symbols (upper case) -\item a finite terminal symbols or tokens (lower case) -\item a start symbol (which must be a nonterminal) -\item a set of rules -\begin{center} -\bl{$A \rightarrow \text{rhs}$} -\end{center} - -where \bl{rhs} are sequences involving terminals and nonterminals.\medskip\pause - -We can also allow rules -\begin{center} -\bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} -\end{center} -\end{itemize} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Palindromes\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$S$ & $\rightarrow$ & $\epsilon$ \\ -$S$ & $\rightarrow$ & $a\cdot S\cdot a$ \\ -$S$ & $\rightarrow$ & $b\cdot S\cdot b$ \\ -\end{tabular}} -\end{center}\pause - -or - -\begin{center} -\bl{\begin{tabular}{lcl} -$S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\ -\end{tabular}} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Arithmetic Expressions\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $num\_token$ \\ -$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ -$E$ & $\rightarrow$ & $( \cdot E \cdot )$ -\end{tabular}} -\end{center}\pause - -\bl{\texttt{1 + 2 * 3 + 4}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Parse Trees\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\ -$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\ -$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\ -\end{tabular}} -\end{center} - -\begin{center} -\begin{tikzpicture}[level distance=8mm, blue] - \node {$E$} - child {node {$F$} - child {node {$T$} - child {node {(\,$E$\,)} - child {node{$F$ *{} $F$} - child {node {$T$} child {node {2}}} - child {node {$T$} child {node {3}}} - } - } - } - child {node {+}} - child {node {$T$} - child {node {(\,$E$\,)} - child {node {$F$} - child {node {$T$ +{} $T$} - child {node {3}} - child {node {4}} - } - }} - }}; -\end{tikzpicture} -\end{center} - -\begin{textblock}{5}(1, 6.5) -\bl{\texttt{(2*3)+(3+4)}} -\end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Ambiguous Grammars\end{tabular}} - -A grammar is \alert{ambiguous} if there is a string that has at least parse trees. - - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $num\_token$ \\ -$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ -$E$ & $\rightarrow$ & $( \cdot E \cdot )$ -\end{tabular}} -\end{center} - -\bl{\texttt{1 + 2 * 3 + 4}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Chomsky Normal Form\end{tabular}} - -All rules must be of the form - -\begin{center} -\bl{$A \rightarrow a$} -\end{center} - -or - -\begin{center} -\bl{$A \rightarrow B\cdot C$} -\end{center} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}CYK Algorithm\end{tabular}} - - -\begin{center} -\bl{\begin{tabular}{@ {}lcl} -$S$ & $\rightarrow$ & $N\cdot P$ \\ -$P$ & $\rightarrow$ & $V\cdot N$ \\ -$N$ & $\rightarrow$ & $N\cdot N$ \\ -$N$ & $\rightarrow$ & $\texttt{students} \;|\; \texttt{Jeff} \;|\; \texttt{geometry} \;|\; \texttt{trains} $ \\ -$V$ & $\rightarrow$ & $\texttt{trains}$ -\end{tabular}} -\end{center} - -\bl{\texttt{Jeff trains geometry students}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}CYK Algorithm\end{tabular}} - - -\begin{itemize} -\item runtime is \bl{$O(n^3)$}\bigskip -\item grammars need to be transferred into CNF -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides07.pdf Binary file slides07.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides07.tex --- a/slides07.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,542 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usetikzlibrary{plotmarks} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 07, King's College London, 14.~November 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - - -% The data files, written on the first run. -\begin{filecontents}{re-python.data} -1 0.029 -5 0.029 -10 0.029 -15 0.032 -16 0.042 -17 0.042 -18 0.055 -19 0.084 -20 0.136 -21 0.248 -22 0.464 -23 0.899 -24 1.773 -25 3.505 -26 6.993 -27 14.503 -28 29.307 -#29 58.886 -\end{filecontents} - -\begin{filecontents}{re-ruby.data} -1 0.00006 -2 0.00003 -3 0.00001 -4 0.00001 -5 0.00001 -6 0.00002 -7 0.00002 -8 0.00004 -9 0.00007 -10 0.00013 -11 0.00026 -12 0.00055 -13 0.00106 -14 0.00196 -15 0.00378 -16 0.00764 -17 0.01606 -18 0.03094 -19 0.06508 -20 0.12420 -21 0.25393 -22 0.51449 -23 1.02174 -24 2.05998 -25 4.22514 -26 8.42479 -27 16.88678 -28 34.79653 -\end{filecontents} - -\begin{filecontents}{re1.data} -1 0.00179 -2 0.00011 -3 0.00014 -4 0.00026 -5 0.00050 -6 0.00095 -7 0.00190 -8 0.00287 -9 0.00779 -10 0.01399 -11 0.01894 -12 0.03666 -13 0.07994 -14 0.08944 -15 0.02377 -16 0.07392 -17 0.22798 -18 0.65310 -19 2.11360 -20 6.31606 -21 21.46013 -\end{filecontents} - -\begin{filecontents}{re2.data} -1 0.00240 -2 0.00013 -3 0.00020 -4 0.00030 -5 0.00049 -6 0.00083 -7 0.00146 -8 0.00228 -9 0.00351 -10 0.00640 -11 0.01217 -12 0.02565 -13 0.01382 -14 0.02423 -15 0.05065 -16 0.06522 -17 0.02140 -18 0.05176 -19 0.18254 -20 0.51898 -21 1.39631 -22 2.69501 -23 8.07952 -\end{filecontents} - -\begin{filecontents}{re-internal.data} -1 0.00069 -301 0.00700 -601 0.00297 -901 0.00470 -1201 0.01301 -1501 0.01175 -1801 0.01761 -2101 0.01787 -2401 0.02717 -2701 0.03932 -3001 0.03470 -3301 0.04349 -3601 0.05411 -3901 0.06181 -4201 0.07119 -4501 0.08578 -\end{filecontents} - -\begin{filecontents}{re3.data} -1 0.001605 -501 0.131066 -1001 0.057885 -1501 0.136875 -2001 0.176238 -2501 0.254363 -3001 0.37262 -3501 0.500946 -4001 0.638384 -4501 0.816605 -5001 1.00491 -5501 1.232505 -6001 1.525672 -6501 1.757502 -7001 2.092784 -7501 2.429224 -8001 2.803037 -8501 3.463045 -9001 3.609 -9501 4.081504 -10001 4.54569 -\end{filecontents} -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (7)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}\bl{$(a?\{n\})a\{n\}$}\end{tabular}} - -\mbox{}\\[-13mm] - -\begin{tikzpicture}[y=.2cm, x=.3cm] - %axis - \draw (0,0) -- coordinate (x axis mid) (30,0); - \draw (0,0) -- coordinate (y axis mid) (0,30); - %ticks - \foreach \x in {0,5,...,30} - \draw (\x,1pt) -- (\x,-3pt) - node[anchor=north] {\x}; - \foreach \y in {0,5,...,30} - \draw (1pt,\y) -- (-3pt,\y) - node[anchor=east] {\y}; - %labels - \node[below=0.6cm] at (x axis mid) {\bl{a}s}; - \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; - - %plots - \draw[color=blue] plot[mark=*, mark options={fill=white}] - file {re-python.data}; - \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] - file {re1.data}; - \draw[color=green] plot[mark=square*, mark options={fill=white} ] - file {re2.data}; - \draw[color=brown] plot[mark=pentagon*, mark options={fill=white} ] - file {re-ruby.data}; - - %legend - \begin{scope}[shift={(4,20)}] - \draw[color=blue] (0,0) -- - plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Python}; - \draw[yshift=-\baselineskip, color=brown] (0,0) -- - plot[mark=pentagon*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Ruby (Daniel Baldwin)}; - \draw[yshift=\baselineskip, color=red] (0,0) -- - plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala V1}; - \draw[yshift=2\baselineskip, color=green] (0,0) -- - plot[mark=square*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala V2 with simplifications}; - \end{scope} -\end{tikzpicture} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] - -\begin{tikzpicture}[y=.7cm, x=.0009cm] - %axis - \draw (0,0) -- coordinate (x axis mid) (10000,0); - \draw (0,0) -- coordinate (y axis mid) (0,6); - %ticks - \foreach \x in {0,2000,...,10000} - \draw (\x,1pt) -- (\x,-3pt) - node[anchor=north] {\x}; - \foreach \y in {0,1,...,6} - \draw (1pt,\y) -- (-3pt,\y) - node[anchor=east] {\y}; - %labels - \node[below=0.6cm] at (x axis mid) {\bl{a}s}; - \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; - - %plots - \draw[color=blue] plot[mark=*, mark options={fill=white}] - file {re-internal.data}; - \only<1->{ - \draw[color=red] plot[mark=triangle*, mark options={fill=white} ] - file {re3.data};} - - %legend - \begin{scope}[shift={(2000,4)}] - \draw[color=blue] (0,0) -- - plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala Internal}; - \only<1->{ - \draw[yshift=\baselineskip, color=red] (0,0) -- - plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small Scala V3 with explicit $\_\{\_\}$};} - \end{scope} -\end{tikzpicture} - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} - \\[-8mm] - \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ - & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ - & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ - \bl{der c (r$\{n\}$)} & \bl{$\dn$} & \bl{if $n = 0$ then $\varnothing$}\\ - & & \bl{else (der c r) $\cdot$ r$\{n - 1\}$} - \end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -\newcommand{\qq}{\mbox{\texttt{"}}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}CFGs\end{tabular}} - -A \alert{context-free} grammar (CFG) \bl{$G$} consists of: - -\begin{itemize} -\item a finite set of nonterminal symbols (upper case) -\item a finite terminal symbols or tokens (lower case) -\item a start symbol (which must be a nonterminal) -\item a set of rules -\begin{center} -\bl{$A \rightarrow \text{rhs}$} -\end{center} - -where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause - -We can also allow rules -\begin{center} -\bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} -\end{center} -\end{itemize} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}A CFG Derivation\end{tabular}} - -\begin{enumerate} -\item Begin with a string with only the start symbol \bl{$S$}\bigskip -\item Replace any non-terminal \bl{$X$} in the string by the right-hand side of some production \bl{$X \rightarrow \text{rhs}$}\bigskip -\item Repeat 2 until there are no non-terminals -\end{enumerate} - -\begin{center} -\bl{$S \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Language of a CFG\end{tabular}} - -Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. -Then the language \bl{$L(G)$} is: - -\begin{center} -\bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$} -\end{center}\pause - -\begin{itemize} -\item Terminals are so-called because there are no rules for replacing them -\item Once generated, terminals are ``permanent'' -\item Terminals ought to be tokens of the language (at least in this course) -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Arithmetic Expressions\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $num\_token$ \\ -$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ -$E$ & $\rightarrow$ & $( \cdot E \cdot )$ -\end{tabular}} -\end{center}\pause\bigskip - -A CFG is \alert{left-recursive} if it has a nonterminal \bl{$E$} such -that \bl{$E \rightarrow^+ E\cdot \ldots$} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Parse Trees\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\ -$F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\ -$T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\ -\end{tabular}} -\end{center} - -\begin{center} -\begin{tikzpicture}[level distance=8mm, blue] - \node {$E$} - child {node {$F$} - child {node {$T$} - child {node {(\,$E$\,)} - child {node{$F$ *{} $F$} - child {node {$T$} child {node {2}}} - child {node {$T$} child {node {3}}} - } - } - } - child {node {+}} - child {node {$T$} - child {node {(\,$E$\,)} - child {node {$F$} - child {node {$T$ +{} $T$} - child {node {3}} - child {node {4}} - } - }} - }}; -\end{tikzpicture} -\end{center} - -\begin{textblock}{5}(1, 6.5) -\bl{\texttt{(2*3)+(3+4)}} -\end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Ambiguous Grammars\end{tabular}} - -A CFG is \alert{ambiguous} if there is a string that has at least parse trees. - - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $num\_token$ \\ -$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ -$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ -$E$ & $\rightarrow$ & $( \cdot E \cdot )$ -\end{tabular}} -\end{center} - -\bl{\texttt{1 + 2 * 3 + 4}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Dangling Else\end{tabular}} - -Another ambiguous grammar:\bigskip - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & if $E$ then $E$\\ - & $|$ & if $E$ then $E$ else $E$ \\ - & $|$ & id -\end{tabular}} -\end{center}\bigskip - -\bl{\texttt{if a then if x then y else c}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides08.pdf Binary file slides08.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides08.tex --- a/slides08.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,676 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usetikzlibrary{plotmarks} -\usepackage{graphicx} - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -% beamer stuff -\renewcommand{\slidecaption}{AFL 08, King's College London, 21.~November 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - - -% The data files, written on the first run. -\begin{filecontents}{s-grammar1.data} -1 0.01152 -51 0.07973 -101 0.09726 -151 0.09320 -201 0.10010 -251 0.16997 -301 0.26662 -351 0.46118 -401 0.62516 -451 0.87247 -501 1.16334 -551 1.71152 -601 2.10958 -651 2.44360 -701 2.98488 -751 3.50326 -801 4.11036 -851 4.93394 -901 5.77465 -951 7.39123 -\end{filecontents} - -\begin{filecontents}{s-grammar2.data} -1 0.01280 -2 0.00064 -3 0.00173 -4 0.00355 -5 0.00965 -6 0.02674 -7 0.06953 -8 0.11166 -9 0.18707 -10 0.09189 -11 0.12724 -12 0.24337 -13 0.59304 -14 1.53594 -15 4.01195 -16 10.73582 -17 29.51587 -#18 73.14163 -\end{filecontents} - - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (8)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - - -\end{frame}} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Building a ``Web Browser''\end{tabular}} - -Using a lexer: assume the following regular expressions - -\begin{center} -\bl{\begin{tabular}{lcl} -$SY\!M$ & $\dn$ & $(\text{a}..\text{zA}..\text{Z0}..\text{9}..)$\\ -$W\!O\!RD$ & $\dn$ & $SY\!M^+$\\ -$BT\!AG$ & $\dn$ & $<\!W\!O\!RD\!>$\\ -$ET\!AG$ & $\dn$ & $<\!/W\!O\!RD\!>$\\ -$W\!HIT\!E$ & $\dn$ & $\texttt{" "} + \texttt{"}\slash{}n\texttt{"}$\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}} - -\begin{itemize} -\item the text should be formatted consistently up to a specified width, say 60 characters -\item potential linebreaks are inserted by the formatter (not the input) -\item repeated whitespaces are ``condensed'' to a single whitepace -\item \bl{$<\!p\!>$} \bl{$<\!\slash{}p\!>$} start/end paragraph -\item \bl{$<\!b\!>$} \bl{$<\!\slash{}b\!>$} start/end bold -\item \bl{$<\!red\!>$} \bl{$<\!\slash{}red\!>$} start/end red (cyan, etc) - - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}} - -The lexer cannot prevent errors like - -\begin{center} -\bl{$<\!b\!>$} \ldots \bl{$<\!p\!>$} \ldots \bl{$<\!\slash{}b\!>$} \ldots \bl{$<\!\slash{}p\!>$} -\end{center} - -or - -\begin{center} -\bl{$<\!\slash{}b\!>$} \ldots \bl{$<\!b\!>$} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Parser Combinators\end{tabular}} - -Parser combinators: \bigskip - -\begin{minipage}{1.1\textwidth} -\begin{center} -\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} -$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ -\end{center} -\end{minipage}\bigskip - -\begin{itemize} -\item sequencing -\item alternative -\item semantic action -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Alternative parser (code \bl{$p\;||\;q$})\bigskip - -\begin{itemize} -\item apply \bl{$p$} and also \bl{$q$}; then combine the outputs -\end{itemize} - -\begin{center} -\large \bl{$p(\text{input}) \cup q(\text{input})$} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Sequence parser (code \bl{$p\sim q$})\bigskip - -\begin{itemize} -\item apply first \bl{$p$} producing a set of pairs -\item then apply \bl{$q$} to the unparsed parts -\item then combine the results:\\ \mbox{}\;\;((output$_1$, output$_2$), unparsed part) -\end{itemize} - -\begin{center} -\begin{tabular}{l} -\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] -\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] -\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} -\end{tabular} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Function parser (code \bl{$p \Longrightarrow f$})\bigskip - -\begin{itemize} -\item apply \bl{$p$} producing a set of pairs -\item then apply the function \bl{$f$} to each first component -\end{itemize} - -\begin{center} -\begin{tabular}{l} -\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} -\end{tabular} -\end{center}\bigskip\bigskip\pause - -\bl{$f$} is the semantic action (``what to do with the parsed input'') - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Token parser:\bigskip - -\begin{itemize} -\item if the input is - -\begin{center} -\large \bl{$tok_1:: tok_2 :: \ldots :: tok_n$} -\end{center} - -then return - -\begin{center} -\large \bl{$\{(tok_1,\; tok_2 :: \ldots :: tok_n)\}$} -\end{center} - -or - -\begin{center} -\large \bl{$\{\}$} -\end{center} - -if \bl{$tok_1$} is not the right token we are looking for -\end{itemize} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Number-Token parser:\bigskip - -\begin{itemize} -\item if the input is - -\begin{center} -\large \bl{$num\_tok(42):: tok_2 :: \ldots :: tok_n$} -\end{center} - -then return - -\begin{center} -\large \bl{$\{(42,\; tok_2 :: \ldots :: tok_n)\}$} -\end{center} - -or - -\begin{center} -\large \bl{$\{\}$} -\end{center} - -if \bl{$tok_1$} is not the right token we are looking for -\end{itemize}\pause - -\begin{center} -list of tokens \bl{$\Rightarrow$} set of (\alert{int}, list of tokens) -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{itemize} -\item if the input is - -\begin{center} -\begin{tabular}{l} -\large \bl{$num\_tok(42)::$}\\ -\hspace{7mm}\large \bl{$num\_tok(3) ::$}\\ -\hspace{14mm}\large \bl{$tok_3 :: \ldots :: tok_n$} -\end{tabular} -\end{center} - -and the parser is - -\begin{center} -\bl{$ntp \sim ntp$} -\end{center} - -the successful output will be - -\begin{center} -\large \bl{$\{((42, 3),\; tok_2 :: \ldots :: tok_n)\}$} -\end{center}\pause - -Now we can form -\begin{center} -\bl{$(ntp \sim ntp) \Longrightarrow f$} -\end{center} - -where \bl{$f$} is the semantic action (``what to do with the pair'') - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} - -Addition - -\begin{center} -\bl{$T \sim + \sim E \Longrightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} -\end{center}\pause - -Multiplication - -\begin{center} -\bl{$F \sim * \sim T \Longrightarrow f((x,y), z) \Rightarrow x * z$} -\end{center}\pause - -Parenthesis - -\begin{center} -\bl{$\text{(} \sim E \sim \text{)} \Longrightarrow f((x,y), z) \Rightarrow y$} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Types of Parsers\end{tabular}} - -\begin{itemize} -\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, -then \bl{$p \sim q$} returns results of type - -\begin{center} -\bl{$T \times S$} -\end{center}\pause - -\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, -and \bl{$p \;||\; q$} returns results of type - -\begin{center} -\bl{$T$} -\end{center}\pause - -\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from -\bl{$T$} to \bl{$S$}, then -\bl{$p \Longrightarrow f$} returns results of type - -\begin{center} -\bl{$S$} -\end{center} - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Input Types of Parsers\end{tabular}} - -\begin{itemize} -\item input: \alert{list of tokens} -\item output: set of (output\_type, \alert{list of tokens}) -\end{itemize}\bigskip\pause - -actually it can be any input type as long as it is a kind of sequence -(for example a string) - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Scannerless Parsers\end{tabular}} - -\begin{itemize} -\item input: \alert{string} -\item output: set of (output\_type, \alert{string}) -\end{itemize}\bigskip - -but lexers are better when whitespaces or comments need to be filtered out - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Successful Parses\end{tabular}} - -\begin{itemize} -\item input: string -\item output: \alert{set of} (output\_type, string) -\end{itemize}\bigskip - -a parse is successful whenever the input has been -fully ``consumed'' (that is the second component is empty) - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -{\lstset{language=Scala}\fontsize{10}{12}\selectfont -\texttt{\lstinputlisting{app7.scala}}} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -{\lstset{language=Scala}\fontsize{10}{12}\selectfont -\texttt{\lstinputlisting{app7.scala}}} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -{\lstset{language=Scala}\fontsize{10}{12}\selectfont -\texttt{\lstinputlisting{app8.scala}}} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Two Grammars\end{tabular}} - -Which languages are recognised by the following two grammars? - -\begin{center} -\bl{\begin{tabular}{lcl} -$S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\ - & $|$ & $\epsilon$ -\end{tabular}} -\end{center}\bigskip - -\begin{center} -\bl{\begin{tabular}{lcl} -$U$ & $\rightarrow$ & $1 \cdot U$\\ - & $|$ & $\epsilon$ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Ambiguous Grammars\end{tabular}} - -\mbox{}\\[-25mm]\mbox{} - -\begin{center} -\begin{tikzpicture}[y=.2cm, x=.009cm] - %axis - \draw (0,0) -- coordinate (x axis mid) (1000,0); - \draw (0,0) -- coordinate (y axis mid) (0,30); - %ticks - \foreach \x in {0, 20, 100, 200,...,1000} - \draw (\x,1pt) -- (\x,-3pt) - node[anchor=north] {\small \x}; - \foreach \y in {0,5,...,30} - \draw (1pt,\y) -- (-3pt,\y) - node[anchor=east] {\small\y}; - %labels - \node[below=0.6cm] at (x axis mid) {\bl{1}s}; - \node[rotate=90, left=1.2cm] at (y axis mid) {secs}; - - %plots - \draw[color=blue] plot[mark=*, mark options={fill=white}] - file {s-grammar1.data}; - \only<2->{\draw[color=red] plot[mark=triangle*, mark options={fill=white} ] - file {s-grammar2.data};} - %legend - \begin{scope}[shift={(400,20)}] - \draw[color=blue] (0,0) -- - plot[mark=*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small unambiguous}; - \only<2->{\draw[yshift=\baselineskip, color=red] (0,0) -- - plot[mark=triangle*, mark options={fill=white}] (0.25,0) -- (0.5,0) - node[right]{\small ambiguous};} - \end{scope} -\end{tikzpicture} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}What about Left-Recursion?\end{tabular}} - -\begin{itemize} -\item we record when we recursively called a parser\bigskip -\item whenever the is a recursion, the parser must have consumed something --- so -we can decrease the input string/list of token by one (at the end) -\end{itemize} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}While-Language\end{tabular}} - - -\begin{center} -\bl{\begin{tabular}{@{}lcl@{}} -$Stmt$ & $\rightarrow$ & $\text{skip}$\\ - & $|$ & $Id := AExp$\\ - & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ - & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ -$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ - & $|$ & $Stmt$\medskip\\ -$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ - & $|$ & $Stmt$\medskip\\ -$AExp$ & $\rightarrow$ & \ldots\\ -$BExp$ & $\rightarrow$ & \ldots\\ -\end{tabular}} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}An Interpreter\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{l} -$\{$\\ -\;\;$x := 5 \text{;}$\\ -\;\;$y := x * 3\text{;}$\\ -\;\;$y := x * 4\text{;}$\\ -\;\;$x := u * 3$\\ -$\}$ -\end{tabular}} -\end{center} - -\begin{itemize} -\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause -\item \bl{\text{eval}(stmt, env)} -\end{itemize} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides09.pdf Binary file slides09.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides09.tex --- a/slides09.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,853 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usetikzlibrary{plotmarks} -\usepackage{graphicx} -\usepackage{pgfplots} - - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{while}{ - morekeywords={if,then,else,while,do,true,false,write}, - otherkeywords={=,!=,:=,<,>,;}, - sensitive=true, - morecomment=[n]{/*}{*/}, -} - - -\lstset{language=While, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - - -% beamer stuff -\renewcommand{\slidecaption}{AFL 09, King's College London, 28.~November 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - - -% The data files, written on the first run. -\begin{filecontents}{compiled.data} -%1 0.234146 -%5000 0.227539 -%10000 0.280748 -50000 1.087897 -100000 3.713165 -250000 21.6624545 -500000 85.872613 -750000 203.6408015 -1000000 345.736574 -\end{filecontents} - -\begin{filecontents}{interpreted.data} -%1 0.00503 -200 1.005863 -400 7.8296765 -500 15.43106 -600 27.2321885 -800 65.249271 -1000 135.4493445 -1200 232.134097 -1400 382.527227 -\end{filecontents} - -\begin{filecontents}{interpreted2.data} -%1 0.00503 -200 1.005863 -400 7.8296765 -600 27.2321885 -800 65.249271 -1000 135.4493445 -1200 232.134097 -1400 382.527227 -\end{filecontents} - -\begin{filecontents}{compiled2.data} -200 0.222058 -400 0.215204 -600 0.202031 -800 0.21986 -1000 0.205934 -1200 0.1981615 -1400 0.207116 -\end{filecontents} - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (9)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Imagine the following situation: You talk to somebody -and you find out that she/he has implemented a compiler.\smallskip - -What is your reaction? Check all that apply.\bigskip\pause - - \begin{itemize} - \item[$\Box$] You think she/he is God - \item[$\Box$] \"Uberhacker - \item[$\Box$] superhuman - \item[$\Box$] wizard - \item[$\Box$] supremo - \end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}While-Language\end{tabular}} - - -\begin{center} -\bl{\begin{tabular}{@{}lcl@{}} -$Stmt$ & $\rightarrow$ & $\text{skip}$\\ - & $|$ & $Id := AExp$\\ - & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ - & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\\ - & $|$ & $\alert{\text{write}\; Id}$\medskip\\ -$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ - & $|$ & $Stmt$\medskip\\ -$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ - & $|$ & $Stmt$\medskip\\ -$AExp$ & $\rightarrow$ & \ldots\\ -$BExp$ & $\rightarrow$ & \ldots\\ -\end{tabular}} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Fibonacci Numbers\end{tabular}} - -\mbox{}\\[-18mm]\mbox{} - -{\lstset{language=While}\fontsize{10}{12}\selectfont -\texttt{\lstinputlisting{fib.while}}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Interpreter\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{@{}lcl@{}} -$\text{eval}(n, E)$ & $\dn$ & $n$\\ -$\text{eval}(x, E)$ & $\dn$ & $E(x)$ \;\;\;\textcolor{black}{lookup \bl{$x$} in \bl{$E$}}\\ -$\text{eval}(a_1 + a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) + \text{eval}(a_2, E)$\\ -$\text{eval}(a_1 - a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) - \text{eval}(a_2, E)$\\ -$\text{eval}(a_1 * a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) * \text{eval}(a_2, E)$\bigskip\\ -$\text{eval}(a_1 = a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) = \text{eval}(a_2, E)$\\ -$\text{eval}(a_1\,!\!= a_2, E)$ & $\dn$ & $\neg(\text{eval}(a_1, E) = \text{eval}(a_2, E))$\\ -$\text{eval}(a_1 < a_2, E)$ & $\dn$ & $\text{eval}(a_1, E) < \text{eval}(a_2, E)$\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Interpreter (2)\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{@{}lcl@{}} -$\text{eval}(\text{skip}, E)$ & $\dn$ & $E$\\ -$\text{eval}(x:=a, E)$ & $\dn$ & \bl{$E(x \mapsto \text{eval}(a, E))$}\\ -\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{if}\;b\;\text{then}\;cs_1\;\text{else}\;cs_2 , E) \dn$}\\ -\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)\;\text{then}\; -\text{eval}(cs_1,E)$}\\ -\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\phantom{\text{if}\;\text{eval}(b,E)\;}\text{else}\;\text{eval}(cs_2,E)$}\\ -\multicolumn{3}{@{}l@{}}{$\text{eval}(\text{while}\;b\;\text{do}\;cs, E) \dn$}\\ -\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{if}\;\text{eval}(b,E)$}\\ -\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{then}\; -\text{eval}(\text{while}\;b\;\text{do}\;cs, \text{eval}(cs,E))$}\\ -\multicolumn{3}{@{}l@{}}{\hspace{2cm}$\text{else}\; E$}\\ -$\text{eval}(\text{write}\; x, E)$ & $\dn$ & $\{\;\text{println}(E(x))\; ;\;E\;\}$\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Test Program\end{tabular}} - -\mbox{}\\[-18mm]\mbox{} - -{\lstset{language=While}\fontsize{10}{12}\selectfont -\texttt{\lstinputlisting{loops.while}}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Interpreted Code\end{tabular}} - -\begin{center} -\begin{tikzpicture} -\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] -\addplot+[smooth] file {interpreted.data}; -\end{axis} -\end{tikzpicture} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Java Virtual Machine\end{tabular}} - -\begin{itemize} -\item introduced in 1995 -\item is a stack-based VM (like Postscript, CLR of .Net) -\item contains a JIT compiler -\item many languages take advantage of JVM's infrastructure (JRE) -\item is garbage collected $\Rightarrow$ no buffer overflows -\item some languages compiled to the JVM: Scala, Clojure\ldots -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -{\Large\bl{1 + 2}} - -\begin{center} -\bl{\begin{tabular}{l} -ldc 1\\ -ldc 2\\ -iadd\\ -\end{tabular}} -\end{center}\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -{\Large\bl{1 + 2 + 3}} - -\begin{center} -\bl{\begin{tabular}{l} -ldc 1\\ -ldc 2\\ -iadd\\ -ldc 3\\ -iadd\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -{\Large\bl{1 + (2 + 3)}} - -\begin{center} -\bl{\begin{tabular}{l} -ldc 1\\ -ldc 2\\ -ldc 3\\ -iadd\\ -iadd\\ -\end{tabular}} -\end{center}\bigskip\pause -\vfill - -\bl{dadd, fadd, ladd, \ldots} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{@{}lcl@{}} -$\text{compile}(n)$ & $\dn$ & $\text{ldc}\;n$\\ -$\text{compile}(a_1 + a_2)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\;\text{compile}(a_2)\;@\; \text{iadd}$}\smallskip\\ -$\text{compile}(a_1 - a_2)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\; \text{compile}(a_2)\;@\; \text{isub}$}\smallskip\\ -$\text{compile}(a_1 * a_2)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1) \;@\; \text{compile}(a_2)\;@\; \text{imul}$}\smallskip\\ -\end{tabular}} -\end{center}\pause - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -{\Large\bl{1 + 2 * 3 + (4 - 3)}} - -\begin{center} -\bl{\begin{tabular}{l} -ldc 1\\ -ldc 2\\ -ldc 3\\ -imul\\ -ldc 4\\ -ldc 3\\ -isub\\ -iadd\\ -iadd\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Variables\end{tabular}} - -{\Large\bl{$x := 5 + y * 2$}}\bigskip\pause - -\begin{itemize} -\item lookup: \bl{$\text{iload}\; index$} -\item store: \bl{$\text{istore}\; index$} -\end{itemize}\bigskip\pause - -while compilating we have to maintain a map between our identifiers and the -Java bytecode indices - -\begin{center} -\bl{$\text{compile}(a, E)$} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -\begin{center} -\bl{\begin{tabular}{@{}lcl@{}} -$\text{compile}(n, E)$ & $\dn$ & $\text{ldc}\;n$\\ -$\text{compile}(a_1 + a_2, E)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\;\text{compile}(a_2. E)\;@\; \text{iadd}$}\smallskip\\ -$\text{compile}(a_1 - a_2, E)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\; \text{compile}(a_2, E)\;@\; \text{isub}$}\smallskip\\ -$\text{compile}(a_1 * a_2, E)$ & $\dn$\\ -\multicolumn{3}{l}{$\qquad\text{compile}(a_1, E) \;@\; \text{compile}(a_2, E)\;@\; \text{imul}$}\bigskip\\ -$\text{compile}(x, E)$ & $\dn$ & $\text{iload}\;E(x)$\\ -\end{tabular}} -\end{center}\pause - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling Statements\end{tabular}} - -We return a list of instructions and an environment for the variables - -\begin{center} -\bl{\begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}} -$\text{compile}(\text{skip}, E)$ & $\dn$ & $(N\!il, E)$\bigskip\\ -$\text{compile}(x := a, E)$ & $\dn$\\ -\multicolumn{3}{l}{$(\text{compile}(a, E) \;@\;\text{istore}\;index, E(x\mapsto index))$}\\ -\end{tabular}} -\end{center}\medskip - -where \bl{$index$} is \bl{$E(x)$} if it is already defined, or if it is not then the largest index not yet seen - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling AExps\end{tabular}} - -{\Large\bl{$x := x + 1$}} - -\begin{center} -\bl{\begin{tabular}{l} -iload $n_x$\\ -ldc 1\\ -iadd\\ -istore $n_x$\\ -\end{tabular}} -\end{center} - -where \bl{$n_x$} is the index corresponding to the variable \bl{$x$} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling Ifs\end{tabular}} - -{\Large\bl{$\text{if}\;b\;\text{else}\;cs_1\;\text{then}\;cs_2$}}\bigskip\bigskip - -\onslide<2->{Case }\only<2>{{\bf True}:}\only<3>{{\bf False}:} - -\begin{center} -\begin{tikzpicture}[node distance=2mm and 4mm, - block/.style={rectangle, minimum size=1cm, draw=black, line width=1mm}, - point/.style={rectangle, inner sep=0mm, minimum size=0mm, fill=red}, - skip loop/.style={red, line width=1mm, to path={-- ++(0,-10mm) -| (\tikztotarget)}}] -\node (A1) [point] {}; -\node (b) [block, right=of A1] {code of \bl{$b$}}; -\node (A2) [point, right=of b] {}; -\node (cs1) [block, right=of A2] {code of \bl{$cs_1$}}; -\node (A3) [point, right=of cs1] {}; -\node (cs2) [block, right=of A3] {code of \bl{$cs_2$}}; -\node (A4) [point, right=of cs2] {}; - -\only<2>{ -\draw (A1) edge [->, red, line width=1mm] (b); -\draw (b) edge [->, red, line width=1mm] (cs1); -\draw (cs1) edge [->, red, line width=1mm] (A3); -\draw (A3) edge [->,skip loop] (A4); -\node [below=of cs2] {\raisebox{-5mm}{\small{}jump}};} -\only<3>{ -\draw (A1) edge [->, red, line width=1mm] (b); -\draw (b) edge [->, red, line width=1mm] (A2); -\draw (A2) edge [skip loop] (A3); -\draw (A3) edge [->, red, line width=1mm] (cs2); -\draw (cs2) edge [->,red, line width=1mm] (A4); -\node [below=of cs1] {\raisebox{-5mm}{\small{}conditional jump}};} -\end{tikzpicture} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Conditional Jumps\end{tabular}} - -\begin{minipage}{1.1\textwidth} -\begin{itemize} -\item \bl{if\_icmpeq $label$} if two ints are equal, then jump\medskip -\item \bl{if\_icmpne $label$} if two ints aren't equal, then jump\medskip -\item \bl{if\_icmpge $label$} if one int is greater or equal then another, then jump -\item[]\ldots -\end{itemize} -\end{minipage}\pause - - -\begin{center} -\bl{\begin{tabular}{l} -$L_1$:\\ -\hspace{5mm}if\_icmpeq\;$L_2$\\ -\hspace{5mm}iload 1\\ -\hspace{5mm}ldc 1\\ -\hspace{5mm}iadd\\ -\hspace{5mm}if\_icmpeq\;$L_1$\\ -$L_2$: -\end{tabular}} -\end{center} - -\begin{textblock}{3.5}(11,12) -\only<3>{labels must be unique} -\end{textblock} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling BExps\end{tabular}} - -{\Large\bl{$a_1 = a_2$}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$\text{compile}(a_1 = a_2, E, lab)$ & $\dn$\\ -\multicolumn{3}{l}{$\quad\text{compile}(a_1, E) \;@\;\text{compile}(a_2, E)\;@\; \text{if\_icmpne}\;lab$} -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling Ifs\end{tabular}} - -{\Large\bl{if $b$ then $cs_1$ else $cs_2$}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$\text{compile}(\text{if}\;b\;\text{then}\; cs_1\;\text{else}\; cs_2, E)$ & $\dn$\\ -\multicolumn{3}{l}{$\quad l_{ifelse}\;$ \textcolor{black}{(fresh label)}}\\ -\multicolumn{3}{l}{$\quad l_{ifend}\;$ \textcolor{black}{(fresh label)}}\\ -\multicolumn{3}{l}{$\quad (is_1, E') = \text{compile}(cs_1, E)$}\\ -\multicolumn{3}{l}{$\quad (is_2, E'') = \text{compile}(cs_2, E')$}\\ -\multicolumn{3}{l}{$\quad(\text{compile}(b, E, l_{ifelse})$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;is_1$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\; \text{goto}\;l_{ifend}$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;l_{ifelse}:$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;is_2$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;l_{ifend}:, E'')$}\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling Whiles\end{tabular}} - -{\Large\bl{$\text{while}\;b\;\text{do}\;cs$}}\bigskip\bigskip - -\onslide<2->{Case }\only<2>{{\bf True}:}\only<3>{{\bf False}:} - -\begin{center} -\begin{tikzpicture}[node distance=2mm and 4mm, - block/.style={rectangle, minimum size=1cm, draw=black, line width=1mm}, - point/.style={rectangle, inner sep=0mm, minimum size=0mm, fill=red}, - skip loop/.style={red, line width=1mm, to path={-- ++(0,-10mm) -| (\tikztotarget)}}] -\node (A0) [point, left=of A1] {}; -\node (A1) [point] {}; -\node (b) [block, right=of A1] {code of \bl{$b$}}; -\node (A2) [point, right=of b] {}; -\node (cs1) [block, right=of A2] {code of \bl{$cs$}}; -\node (A3) [point, right=of cs1] {}; -\node (A4) [point, right=of A3] {}; - -\only<2>{ -\draw (A0) edge [->, red, line width=1mm] (b); -\draw (b) edge [->, red, line width=1mm] (cs1); -\draw (cs1) edge [->, red, line width=1mm] (A3); -\draw (A3) edge [->,skip loop] (A1);} -\only<3>{ -\draw (A0) edge [->, red, line width=1mm] (b); -\draw (b) edge [->, red, line width=1mm] (A2); -\draw (A2) edge [skip loop] (A3); -\draw (A3) edge [->, red, line width=1mm] (A4);} -\end{tikzpicture} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling Whiles\end{tabular}} - -{\Large\bl{while $b$ do $cs$}} - -\begin{center} -\bl{\begin{tabular}{lcl} -$\text{compile}(\text{while}\; b\; \text{do} \;cs, E)$ & $\dn$\\ -\multicolumn{3}{l}{$\quad l_{wbegin}\;$ \textcolor{black}{(fresh label)}}\\ -\multicolumn{3}{l}{$\quad l_{wend}\;$ \textcolor{black}{(fresh label)}}\\ -\multicolumn{3}{l}{$\quad (is, E') = \text{compile}(cs_1, E)$}\\ -\multicolumn{3}{l}{$\quad(l_{wbegin}:$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;\text{compile}(b, E, l_{wend})$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;is$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\; \text{goto}\;l_{wbegin}$}\\ -\multicolumn{3}{l}{$\quad\phantom{(}@\;l_{wend}:, E')$}\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiling Writes\end{tabular}} - -{\Large\bl{write $x$}} - -\begin{center} -\small\bl{\begin{tabular}{l} -.method public static write(I)V\hspace{1cm}\textcolor{black}{(library function)}\\ -\;\; .limit locals 5 \\ -\;\; .limit stack 5 \\ -\;\; iload 0 \\ -\;\; getstatic java/lang/System/out Ljava/io/PrintStream;\\ -\;\; swap \\ -\;\; invokevirtual java/io/PrintStream/println(I)V \\ -\;\; return \\ -.end method\bigskip\bigskip\\ -% -\normalsize -iload $E(x)$\\ -invokestatic write(I)V\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\begin{center} -\small\bl{\begin{tabular}{l} -.class public XXX.XXX\\ -.super java/lang/Object\\ -\\ -.method public ()V\\ -\;\; aload\_0\\ -\;\; invokenonvirtual java/lang/Object/()V\\ - \;\; return\\ -.end method\\ -\\ -.method public static main([Ljava/lang/String;)V\\ -\;\; .limit locals 200\\ -\;\; .limit stack 200\\ -\\ - \textcolor{black}{(here comes the compiled code)}\\ -\\ -\;\; return\\ -.end method\\ -\end{tabular}} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Next Compiler Phases\end{tabular}} - -\begin{itemize} -\item assembly $\Rightarrow$ byte code (class file) -\item labels $\Rightarrow$ absolute or relative jumps\bigskip\bigskip -\item \texttt{javap} is a disassembler for class files -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiled Code\end{tabular}} - -\begin{center} -\begin{tikzpicture} -\begin{axis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] -\addplot+[smooth] file {compiled.data}; -\end{axis} -\end{tikzpicture} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiled vs.~Interpreted Code\end{tabular}} - -\begin{center} -\begin{tikzpicture} -\begin{loglogaxis}[axis x line=bottom, axis y line=left, xlabel=n, ylabel=secs, legend style=small] -\addplot+[smooth] file {interpreted.data}; -\addplot+[smooth] file {compiled.data}; -\end{loglogaxis} -\end{tikzpicture} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Compiled vs.~Interpreted Code\end{tabular}} - -\begin{center} -\begin{tikzpicture} -\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, - xlabel=n, - enlargelimits=0.05, - ybar interval=0.7, legend style=small] -\addplot file {interpreted2.data}; -\addplot file {compiled2.data}; -%\legend{interpreted, compiled} -\end{axis} -\end{tikzpicture} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}What Next\end{tabular}} - -\begin{itemize} -\item register spilling -\item dead code removal -\item loop optimisations -\item instruction selection -\item type checking -\item concurrency -\item fuzzy testing -\item verification\bigskip\\ - -\item GCC, LLVM, tracing JITs -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: - diff -r e85600529ca5 -r 4794759139ea slides10.pdf Binary file slides10.pdf has changed diff -r e85600529ca5 -r 4794759139ea slides10.tex --- a/slides10.tex Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,383 +0,0 @@ -\documentclass[dvipsnames,14pt,t]{beamer} -\usepackage{beamerthemeplainculight} -\usepackage[T1]{fontenc} -\usepackage[latin1]{inputenc} -\usepackage{mathpartir} -\usepackage[absolute,overlay]{textpos} -\usepackage{ifthen} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{calc} -\usepackage{ulem} -\usepackage{courier} -\usepackage{listings} -\renewcommand{\uline}[1]{#1} -\usetikzlibrary{arrows} -\usetikzlibrary{automata} -\usetikzlibrary{shapes} -\usetikzlibrary{shadows} -\usetikzlibrary{positioning} -\usetikzlibrary{calc} -\usetikzlibrary{plotmarks} -\usepackage{graphicx} -\usepackage{pgfplots} - - -\definecolor{javared}{rgb}{0.6,0,0} % for strings -\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments -\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords -\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc - -\lstset{language=Java, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{scala}{ - morekeywords={abstract,case,catch,class,def,% - do,else,extends,false,final,finally,% - for,if,implicit,import,match,mixin,% - new,null,object,override,package,% - private,protected,requires,return,sealed,% - super,this,throw,trait,true,try,% - type,val,var,while,with,yield}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@}, - sensitive=true, - morecomment=[l]{//}, - morecomment=[n]{/*}{*/}, - morestring=[b]", - morestring=[b]', - morestring=[b]""" -} - - -\lstset{language=Scala, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - -\lstdefinelanguage{while}{ - morekeywords={if,then,else,while,do,true,false,write}, - otherkeywords={=,!=,:=,<,>,;}, - sensitive=true, - morecomment=[n]{/*}{*/}, -} - - -\lstset{language=While, - basicstyle=\ttfamily, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} - - -% beamer stuff -\renewcommand{\slidecaption}{AFL 10, King's College London, 5.~December 2012} -\newcommand{\bl}[1]{\textcolor{blue}{#1}} -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - - -% The data files, written on the first run. -\begin{filecontents}{compiled.data} -%1 0.234146 -%5000 0.227539 -%10000 0.280748 -50000 1.087897 -100000 3.713165 -250000 21.6624545 -500000 85.872613 -750000 203.6408015 -1000000 345.736574 -\end{filecontents} - -\begin{filecontents}{interpreted.data} -%1 0.00503 -200 1.005863 -400 7.8296765 -500 15.43106 -600 27.2321885 -800 65.249271 -1000 135.4493445 -1200 232.134097 -1400 382.527227 -\end{filecontents} - -\begin{filecontents}{interpreted2.data} -%1 0.00503 -200 1.005863 -400 7.8296765 -600 27.2321885 -800 65.249271 -1000 135.4493445 -1200 232.134097 -1400 382.527227 -\end{filecontents} - -\begin{filecontents}{compiled2.data} -200 0.222058 -400 0.215204 -600 0.202031 -800 0.21986 -1000 0.205934 -1200 0.1981615 -1400 0.207116 -\end{filecontents} - -\begin{document} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<1>[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Automata and \\[-2mm] - \LARGE Formal Languages (10)\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - Of$\!$fice: & S1.27 (1st floor Strand Building)\\ - Slides: & KEATS (also home work is there)\\ - \end{tabular} - \end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\Large\bf -There are more problems, than there are programs.\bigskip\bigskip\pause\\ - -There must be a problem for which there is no program. -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Revision: Proofs} - -\begin{center} -\includegraphics[scale=0.4]{river-stones.jpg} -\end{center} - -\end{frame}} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Subsets} - -\Large -\bl{$A \subseteq B$}\bigskip\bigskip\\ - -\bl{$\forall e.\; e \in A \Rightarrow e \in B$} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Subsets} - -\Large -\bl{$A \subseteq B$} and \bl{$B \subseteq A$}\bigskip - -then \bl{$A = B$} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Injective Function} - -\Large -\bl{f} is an injective function iff \bigskip - -\bl{$\forall x y.\; f(x) = f(y) \Rightarrow x = y$} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Cardinality} - -\Large -\bl{$|A|$} $\dn$ ``how many elements''\bigskip\\ - -\bl{$A \subseteq B \Rightarrow |A| \leq |B|$}\bigskip\\\pause - -if there is an injective function \bl{$f: A \rightarrow B$} then \bl{$|A| \leq |B|$}\ - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Natural Numbers} - -\Large -\bl{$\mathbb{N}$} \bl{$\dn$} \bl{$\{0, 1, 2, 3, .......\}$}\bigskip\pause - -\bl{$A$} is \alert{countable} iff \bl{$|A| \leq |\mathbb{N}|$} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{First Question} - -\Large -\bl{$|\mathbb{N} - \{0\}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\bigskip - -\normalsize -\bl{$\geq$} or \bl{$\leq$} or \bl{$=$} -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\Large -\bl{$|\mathbb{N} - \{0, 1\}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\pause - -\bl{$|\mathbb{N} - \mathbb{O}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\bigskip - -\normalsize -\bl{$\mathbb{O}$} $\dn$ odd numbers\quad \bl{$\{1,3,5......\}$}\\ \pause -\bl{$\mathbb{E}$} $\dn$ even numbers\quad \bl{$\{0,2,4......\}$}\\ -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\Large -\bl{$|\mathbb{N} \cup \mathbb{-N}| \;\;\;\alert{?}\;\;\; |\mathbb{N}| $}\bigskip\bigskip - - -\normalsize -\bl{$\mathbb{\phantom{-}N}$} $\dn$ positive numbers\quad \bl{$\{0,1,2,3,......\}$}\\ -\bl{$\mathbb{-N}$} $\dn$ negative numbers\quad \bl{$\{0,-1,-2,-3,......\}$}\\ -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -\Large -\bl{$A$} is \alert{countable} if there exists an injective \bl{$f : A \rightarrow \mathbb{N}$}\bigskip - -\bl{$A$} is \alert{uncountable} if there does not exist an injective \bl{$f : A \rightarrow \mathbb{N}$}\bigskip\bigskip - - -countable: \bl{$|A| \leq |\mathbb{N}|$}\\ -uncountable: \bl{$|A| > |\mathbb{N}|$}\pause\bigskip - - -Does there exist such an \bl{$A$} ? - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Halting Problem} - -\large -Assume a program \bl{$H$} that decides for all programs \bl{$A$} and all -input data \bl{$D$} whether\bigskip - -\begin{itemize} -\item \bl{$H(A, D) \dn 1$} iff \bl{$A(D)$} terminates -\item \bl{$H(A, D) \dn 0$} otherwise -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Halting Problem (2)} - -\large -Given such a program \bl{$H$} define the following program \bl{$C$}: -for all programs \bl{$A$}\bigskip - -\begin{itemize} -\item \bl{$C(A) \dn 0$} iff \bl{$H(A, A) = 0$} -\item \bl{$C(A) \dn$ loops} otherwise -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Contradiction} - - -\bl{$H(C, C)$} is either \bl{$0$} or \bl{$1$}. - -\begin{itemize} -\item \bl{$H(C, C) = 1$} $\stackrel{\text{def}\,H}{\Rightarrow}$ \bl{$C(C)\downarrow$} $\stackrel{\text{def}\,C}{\Rightarrow}$ \bl{$H(C, C)=0$} -\item \bl{$H(C, C) = 0$} $\stackrel{\text{def}\,H}{\Rightarrow}$ \bl{$C(C)$} loops $\stackrel{\text{def}\,C}{\Rightarrow}$\\ -\hspace{7cm}\bl{$H(C, C)=1$} -\end{itemize} - -Contradiction in both cases. So \bl{$H$} cannot exist. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: -