updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Fri, 30 Oct 2020 01:45:03 +0000
changeset 798 aaf0bd0a211d
parent 797 ddcb616e036a
child 799 85267be9a5ed
updated
graphics.sty
handouts/ho05.pdf
handouts/ho05.tex
slides/slides05.pdf
slides/slides05.tex
style.sty
--- a/graphics.sty	Thu Oct 29 00:25:20 2020 +0000
+++ b/graphics.sty	Fri Oct 30 01:45:03 2020 +0000
@@ -22,3 +22,30 @@
 \bgroup\begin{minipage}{#1}\raggedright{}}
 {\end{minipage}\egroup;%
 \end{tikzpicture}\bigskip}
+
+
+%%% for trees
+%% http://anorien.csc.warwick.ac.uk/mirrors/CTAN/graphics/pgf/contrib/forest/forest.pdf
+
+\newcommand\grid[1]{%
+\begin{tikzpicture}[baseline=(char.base)]
+  \path[use as bounding box]
+    (0,0) rectangle (1em,1em);
+  \draw[red!50, fill=red!20]
+    (0,0) rectangle (1em,1em);
+  \node[inner sep=1pt,anchor=base west]
+    (char) at (0em,\gridraiseamount) {#1};
+\end{tikzpicture}}
+\newcommand\gridraiseamount{0.12em}
+
+\makeatletter
+\newcommand\Grid[1]{%
+  \@tfor\z:=#1\do{\grid{\z}}}
+\makeatother	
+
+\newcommand\Vspace[1][.3em]{%
+  \mbox{\kern.06em\vrule height.3ex}%
+  \vbox{\hrule width#1}%
+  \hbox{\vrule height.3ex}}
+
+\def\VS{\Vspace[0.6em]}
\ No newline at end of file
Binary file handouts/ho05.pdf has changed
--- a/handouts/ho05.tex	Thu Oct 29 00:25:20 2020 +0000
+++ b/handouts/ho05.tex	Fri Oct 30 01:45:03 2020 +0000
@@ -3,6 +3,7 @@
 \usepackage{../style}
 \usepackage{../langs}
 \usepackage{../grammar}
+\usepackage{../graphics}
 
 % epsilon and left-recursion elimination
 % http://www.mollypages.org/page/grammar/index.mp
@@ -419,7 +420,7 @@
 
 \noindent To follow the general scheme of the transfromation,
 the $\alpha$ is either is either $0$ or $1$, and the $\beta$ happens
-to be empty. SO we need to generate new rules for the form 
+to be empty. So we need to generate new rules for the form 
 \mbox{$\meta{A} := \alpha\cdot\beta$}, which in our particular 
 example means we obtain
 
@@ -509,9 +510,9 @@
 is recognised by the grammar. The CYK algorithm starts with the
 following triangular data structure.
 
-\begin{figure}[t]
+%%\begin{figure}[t]
 \begin{center}
-  \begin{tikzpicture}[scale=0.8,line width=0.8mm]
+  \begin{tikzpicture}[scale=0.7,line width=0.8mm]
   \draw (-2,0) -- (4,0);
   \draw (-2,1) -- (4,1);
   \draw (-2,2) -- (3,2);
@@ -542,6 +543,12 @@
   \draw ( 2.5,0.5) node {$N$}; 
   \draw ( 3.5,0.5) node {$N,V$};
 
+%  \draw (-1.5,1.5) node {\small{}a}; 
+%  \draw (-0.5,1.5) node {\small{}b}; 
+%  \draw ( 0.5,1.5) node {\small{}c}; 
+%  \draw ( 1.5,1.5) node {\small{}d}; 
+%  \draw ( 2.5,1.5) node {\small{}e}; 
+  
   \draw (-2.4, 5.5) node {$1$}; 
   \draw (-2.4, 4.5) node {$2$}; 
   \draw (-2.4, 3.5) node {$3$}; 
@@ -550,7 +557,173 @@
   \draw (-2.4, 0.5) node {$6$}; 
   \end{tikzpicture}
   \end{center}
-\end{figure}
+%%\end{figure}
+
+
+\noindent
+The last row contains the information about all words and their
+corresponding non-terminals. For example the field for \texttt{trains}
+contains the information $\meta{N}$ and $\meta{V}$ because it can be a
+``verb'' and a ``noun'' according to the grammar.  The row above,
+let's call the corresponding fields 5a to 5e, contains information
+about 2-word parts of the sentence, namely
+
+\begin{center}
+\begin{tabular}{llll}
+  5a) & $\underbrace{\texttt{The}}_{A}$ $\mid$ $\underbrace{\texttt{trainer}}_{N}$   \\
+  5b) & $\underbrace{\texttt{trainer}}_{N}$ $\mid$ $\underbrace{\texttt{trains}}_{N,V}$\\
+  5c) & \texttt{trains} $\mid$ \texttt{the}    \\
+  5d) & \texttt{the} $\mid$ \texttt{student}   \\
+  5e) & \texttt{student} $\mid$ \texttt{team}  \\
+\end{tabular}  
+\end{center}  
+
+\noindent
+For each of them, we look up in row 6 which non-terminals it belongs to
+(indicated above for 5a and 5b). For 5a, with the non-terminals
+\meta{A} and \meta{N}, we find the grammar rule
+
+\begin{plstx}[margin=1cm]
+  : \meta{N} ::= \meta{A}\cdot \meta{N}\\
+\end{plstx}
+
+\noindent
+which means into field 5a we put the left-hand side of this rule,
+which in this case is the non-terminal \meta{N}. For 5b we have to check
+for both $\meta{N}\cdot\meta{N}$ and $\meta{N}\cdot\meta{V}$ whether there
+is a right-hand side of this form in the grammar. But only the
+grammar rule 
+
+\begin{plstx}[margin=1cm]
+  : \meta{N} ::= \meta{N}\cdot \meta{N}\\
+\end{plstx}
+
+\noindent
+matches, which means 5b gets also an \meta{N}. Continuing for all
+fields in row 5 gives:
+
+\begin{center}
+  \begin{tikzpicture}[scale=0.7,line width=0.8mm]
+  \draw (-2,0) -- (4,0);
+  \draw (-2,1) -- (4,1);
+  \draw (-2,2) -- (3,2);
+  \draw (-2,3) -- (2,3);
+  \draw (-2,4) -- (1,4);
+  \draw (-2,5) -- (0,5);
+  \draw (-2,6) -- (-1,6);
+  
+  \draw (0,0) -- (0, 5);
+  \draw (1,0) -- (1, 4);
+  \draw (2,0) -- (2, 3);
+  \draw (3,0) -- (3, 2);
+  \draw (4,0) -- (4, 1);
+  \draw (-1,0) -- (-1, 6);
+  \draw (-2,0) -- (-2, 6);
+  
+  \draw (-1.5,-0.5) node {\footnotesize{}\texttt{The}}; 
+  \draw (-0.5,-1.0) node {\footnotesize{}\texttt{trainer}}; 
+  \draw ( 0.5,-0.5) node {\footnotesize{}\texttt{trains}}; 
+  \draw ( 1.5,-1.0) node {\footnotesize{}\texttt{the}}; 
+  \draw ( 2.5,-0.5) node {\footnotesize{}\texttt{student}}; 
+  \draw ( 3.5,-1.0) node {\footnotesize{}\texttt{team}};
+  
+  \draw (-1.5,0.5) node {$A$}; 
+  \draw (-0.5,0.5) node {$N$}; 
+  \draw ( 0.5,0.5) node {$N,V$}; 
+  \draw ( 1.5,0.5) node {$A$}; 
+  \draw ( 2.5,0.5) node {$N$}; 
+  \draw ( 3.5,0.5) node {$N,V$};
+
+  \draw (-1.5,1.5) node {$N$}; 
+  \draw (-0.5,1.5) node {$N$}; 
+  \draw ( 0.5,1.5) node {$$}; 
+  \draw ( 1.5,1.5) node {$N$}; 
+  \draw ( 2.5,1.5) node {$N$}; 
+
+
+%  \draw (-1.5,1.5) node {\small{}a}; 
+%  \draw (-0.5,1.5) node {\small{}b}; 
+%  \draw ( 0.5,1.5) node {\small{}c}; 
+%  \draw ( 1.5,1.5) node {\small{}d}; 
+%  \draw ( 2.5,1.5) node {\small{}e}; 
+  
+  \draw (-2.4, 5.5) node {$1$}; 
+  \draw (-2.4, 4.5) node {$2$}; 
+  \draw (-2.4, 3.5) node {$3$}; 
+  \draw (-2.4, 2.5) node {$4$}; 
+  \draw (-2.4, 1.5) node {$5$}; 
+  \draw (-2.4, 0.5) node {$6$}; 
+  \end{tikzpicture}
+  \end{center}
+
+\noindent
+Now row 4 is in charge of all 3-word parts of the sentence, namely
+
+\begin{center}
+\begin{tabular}{llll}
+  4a) & The $\mid$ trainer trains\\
+      & The trainer $\mid$ trains\\
+  4b) & trainer $\mid$ trains the\\
+      & trainer trains $\mid$ the\\
+  4c) & trains $\mid$ the student\\
+      & trains the $\mid$ student\\
+  4d) & the $\mid$ student team\\
+      & the student $\mid$ team\\
+\end{tabular}  
+\end{center}
+
+\noindent
+Note that in case of 3-word parts we have two splits. For example for
+4a: $\underbrace{\texttt{The}}_{A}$ and
+$\underbrace{\texttt{trainer trains}}_{N}$; and also
+$\underbrace{\texttt{The trainer}}_{N}$ and
+$\underbrace{\texttt{trains}}_{N,V}$. For each of these splits we have
+to look up in the rows below, which non-terminals we already
+computed. This allows us to look for right-hand sides in our grammar:
+$\meta{A}\cdot\meta{N}$, $\meta{N}\cdot\meta{N}$ and
+$\meta{N}\cdot\meta{V}$, which yield the only left-hand side
+\meta{N}. This is what we fill in for 4a. And so on for row 4.
+
+Row 3 is about all 4-word parts in the sentence, namely
+
+\begin{center}
+\begin{tabular}{llll}
+  3a) & The trainer trains the\\
+  3b) & trainer trains the student\\
+  3c) & trains the student team\\
+\end{tabular}  
+\end{center}
+
+\noindent
+Each of them can be split up in 3 ways, for example
+
+\begin{center}
+\begin{tabular}{llll}
+  3a) & The $\mid$ trainer trains the\\
+      & The trainer $\mid$ trains the\\
+      & The trainer trains $\mid$ the\\
+\end{tabular}  
+\end{center}
+
+\noindent
+and we have to consider them all in turn to fill in the non-terminals for
+3a. You can guess how it continues: row 2 is for all 5-word parts, and finally
+the field on the top is for the whole sentence.
+The idea of the CYK algorithm is that if in the top-field the starting
+symbol \meta{S} appears (possibly together with other non-terminals),
+then the sentence is accepted by the grammar. If it does not, then the
+sentence is not accepted.
+
+Let us very quickly calculate the complexity of the CYK
+algorithm. Lookup operations inside the triangle and in the grammar
+are assumed to be of constant time, $O(1)$, meaning they do not
+matter. How many fields are in the triangle\ldots
+$\frac{n}{2} * (n+1)$, where $n$ is the size of the input. That means
+roughly $O(n^2)$ fields. How much work do we have to do for each
+field? Well, for the top-most we have to consider $n-1$ splits, which
+means roughly $O(n)$ for each field. The overall result is a $O(n^3)$
+time-complexity for CYK. It turns out that this is the best worst-time
+complexity for parsing with context-free grammars.
 
 \end{document}
 
Binary file slides/slides05.pdf has changed
--- a/slides/slides05.tex	Thu Oct 29 00:25:20 2020 +0000
+++ b/slides/slides05.tex	Fri Oct 30 01:45:03 2020 +0000
@@ -397,7 +397,7 @@
   child {node {$\meta{E}$}
     child[sibling distance=10mm] {node {$\meta{T}$}
       child {node {$\meta{F}$} child {node {2}}}
-      child {node {+}}
+      child {node {*}}
       child {node {$\meta{T}$} child {node {$\meta{F}$} child {node {3}}}}
     }
     child {node {+}}
@@ -475,6 +475,106 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
+\frametitle{CYK Algorithm}
+
+Suppose the grammar:
+
+\begin{center}
+\bl{\begin{tabular}{@ {}lcl@ {}}
+$\meta{S}$ & $::=$ &  $\meta{N}\cdot \meta{P}$ \\
+$\meta{P}$ & $::=$ &  $\meta{V}\cdot \meta{N}$ \\
+$\meta{N}$ & $::=$ &  $\meta{N}\cdot \meta{N}$ \\
+$\meta{N}$ & $::=$ &  $\texttt{students} \;|\; \texttt{Jeff} \;|\; \texttt{geometry} \;|\; \texttt{trains} $ \\
+$\meta{V}$ & $::=$ &  $\texttt{trains}$ 
+\end{tabular}}
+\end{center}
+
+\bl{\texttt{Jeff trains geometry students}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{CYK Algorithm}
+
+\begin{center}
+  \begin{tikzpicture}[scale=1,line width=0.8mm]
+  \draw (-2,0) -- (2,0);
+  \draw (-2,1) -- (2,1);
+  \draw (-2,2) -- (1,2);
+  \draw (-2,3) -- (0,3);
+  \draw (-2,4) -- (-1,4);
+  
+  \draw (0,0) -- (0, 3);
+  \draw (1,0) -- (1, 2);
+  \draw (2,0) -- (2, 1);
+  \draw (-1,0) -- (-1, 4);
+  \draw (-2,0) -- (-2, 4);
+  
+  \draw (-1.5,-0.5) node {\footnotesize{}\texttt{Jeff}}; 
+  \draw (-0.5,-1.0) node {\footnotesize{}\texttt{trains}}; 
+  \draw ( 0.5,-0.5) node {\footnotesize{}\texttt{geometry}}; 
+  \draw ( 1.5,-1.0) node {\footnotesize{}\texttt{students}}; 
+  
+  \draw (-1.5,0.5) node {$N$}; 
+  \draw (-0.5,0.5) node {$N,V$}; 
+  \draw ( 0.5,0.5) node {$N$}; 
+  \draw ( 1.5,0.5) node {$N$}; 
+
+  \draw (-2.4, 3.5) node {$1$}; 
+  \draw (-2.4, 2.5) node {$2$}; 
+  \draw (-2.4, 1.5) node {$3$}; 
+  \draw (-2.4, 0.5) node {$4$}; 
+  \end{tikzpicture}
+  \end{center}
+
+\begin{textblock}{5}(10,10)
+\small\bl{\begin{tabular}{@ {}lcl@ {}}
+$\meta{S}$ & $::=$ &  $\meta{N}\cdot \meta{P}$ \\
+$\meta{P}$ & $::=$ &  $\meta{V}\cdot \meta{N}$ \\
+$\meta{N}$ & $::=$ &  $\meta{N}\cdot \meta{N}$ \\
+            $\meta{N}$ & $::=$ &  $\texttt{students} \;|\; \texttt{Jeff}$\\
+            & & $\;|\; \texttt{geometry} \;|\; \texttt{trains} $ \\
+$\meta{V}$ & $::=$ &  $\texttt{trains}$ 
+\end{tabular}}  
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{Chomsky Normal Form}
+
+A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:
+
+\bl{\begin{plstx}[margin=0cm]
+: \meta{S} ::=  a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a\cdot a | b\cdot b | a | b \\
+\end{plstx}}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{CYK Algorithm}
+
+
+\begin{itemize}
+\item fastest possible algorithm for recognition problem
+\item runtime is \bl{$O(n^3)$}\bigskip
+\item grammars need to be transformed into CNF
+\end{itemize}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
 \frametitle{Context Sensitive Grammars}
 
 It is much harder to find out whether a string is parsed
@@ -919,6 +1019,15 @@
 \end{frame}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
+\begin{frame}[t,fragile]
+\begin{mybox3}{}
+  For CW2, please include '$\backslash$' as a symbol in strings, because
+  the collatz program contains
+  \begin{lstlisting}[language=Scala, numbers=none]
+  write "\n";\end{lstlisting}  
+\end{mybox3}
+\end{frame}
+
 \begin{frame}[t]
 \begin{mybox3}{}
   val (r1s, f1s) = simp(r1)\\
--- a/style.sty	Thu Oct 29 00:25:20 2020 +0000
+++ b/style.sty	Fri Oct 30 01:45:03 2020 +0000
@@ -5,7 +5,7 @@
 \setmainfont[Ligatures=TeX]{Palatino Linotype}
 \usepackage{amssymb}
 \usepackage{amsmath}
-\usepackage{menukeys}
+%\usepackage{menukeys}
 \definecolor{darkblue}{rgb}{0,0,0.6}
 \usepackage[colorlinks=true,urlcolor=darkblue,linkcolor=darkblue]{hyperref}
 \usepackage{soul}
@@ -24,31 +24,7 @@
 \newcommand{\inj}{\textit{inj}}
 \newcommand{\nullable}{\textit{nullable}}
 
-%%% for trees
-%% http://anorien.csc.warwick.ac.uk/mirrors/CTAN/graphics/pgf/contrib/forest/forest.pdf
 
-\newcommand\grid[1]{%
-\begin{tikzpicture}[baseline=(char.base)]
-  \path[use as bounding box]
-    (0,0) rectangle (1em,1em);
-  \draw[red!50, fill=red!20]
-    (0,0) rectangle (1em,1em);
-  \node[inner sep=1pt,anchor=base west]
-    (char) at (0em,\gridraiseamount) {#1};
-\end{tikzpicture}}
-\newcommand\gridraiseamount{0.12em}
-
-\makeatletter
-\newcommand\Grid[1]{%
-  \@tfor\z:=#1\do{\grid{\z}}}
-\makeatother	
-
-\newcommand\Vspace[1][.3em]{%
-  \mbox{\kern.06em\vrule height.3ex}%
-  \vbox{\hrule width#1}%
-  \hbox{\vrule height.3ex}}
-
-\def\VS{\Vspace[0.6em]}
 
 
 %%% url pointers