468
|
1 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
2 |
% Masters/Doctoral Thesis
|
|
3 |
% LaTeX Template
|
|
4 |
% Version 2.5 (27/8/17)
|
|
5 |
%
|
|
6 |
% This template was downloaded from:
|
|
7 |
% http://www.LaTeXTemplates.com
|
|
8 |
%
|
|
9 |
% Version 2.x major modifications by:
|
|
10 |
% Vel (vel@latextemplates.com)
|
|
11 |
%
|
|
12 |
% This template is based on a template by:
|
|
13 |
% Steve Gunn (http://users.ecs.soton.ac.uk/srg/softwaretools/document/templates/)
|
|
14 |
% Sunil Patel (http://www.sunilpatel.co.uk/thesis-template/)
|
|
15 |
%
|
|
16 |
% Template license:
|
|
17 |
% CC BY-NC-SA 3.0 (http://creativecommons.org/licenses/by-nc-sa/3.0/)
|
|
18 |
%
|
|
19 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
20 |
|
|
21 |
%----------------------------------------------------------------------------------------
|
|
22 |
% PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
|
|
23 |
%----------------------------------------------------------------------------------------
|
|
24 |
|
|
25 |
\documentclass[
|
|
26 |
11pt, % The default document font size, options: 10pt, 11pt, 12pt
|
|
27 |
%oneside, % Two side (alternating margins) for binding by default, uncomment to switch to one side
|
|
28 |
english, % ngerman for German
|
|
29 |
singlespacing, % Single line spacing, alternatives: onehalfspacing or doublespacing
|
|
30 |
%draft, % Uncomment to enable draft mode (no pictures, no links, overfull hboxes indicated)
|
|
31 |
%nolistspacing, % If the document is onehalfspacing or doublespacing, uncomment this to set spacing in lists to single
|
|
32 |
%liststotoc, % Uncomment to add the list of figures/tables/etc to the table of contents
|
|
33 |
%toctotoc, % Uncomment to add the main table of contents to the table of contents
|
|
34 |
%parskip, % Uncomment to add space between paragraphs
|
|
35 |
%nohyperref, % Uncomment to not load the hyperref package
|
|
36 |
headsepline, % Uncomment to get a line under the header
|
|
37 |
%chapterinoneline, % Uncomment to place the chapter title next to the number on one line
|
471
|
38 |
consistentlayout, % Uncomment to change the layout of the declaration, abstract and acknowledgements pages to match the default layout
|
468
|
39 |
]{MastersDoctoralThesis} % The class file specifying the document structure
|
|
40 |
|
|
41 |
\usepackage[utf8]{inputenc} % Required for inputting international characters
|
|
42 |
\usepackage[T1]{fontenc} % Output font encoding for international characters
|
505
|
43 |
%\usepackage{fdsymbol} % Loads unicode-math
|
468
|
44 |
|
|
45 |
\usepackage{mathpazo} % Use the Palatino font by default
|
|
46 |
\usepackage{hyperref}
|
|
47 |
\usepackage[backend=bibtex,style=authoryear,natbib=true]{biblatex} % Use the bibtex backend with the authoryear citation style (which resembles APA)
|
500
|
48 |
\usepackage{stmaryrd}
|
468
|
49 |
|
|
50 |
\addbibresource{example.bib} % The filename of the bibliography
|
|
51 |
|
|
52 |
\usepackage[autostyle=true]{csquotes} % Required to generate language-dependent quotes in the bibliography
|
|
53 |
|
|
54 |
%My Newly added Libraries in addition to template
|
|
55 |
\usepackage{graphic}
|
|
56 |
\usepackage{data}
|
|
57 |
|
|
58 |
%\usepackage{algorithm}
|
|
59 |
\usepackage{amsmath}
|
506
|
60 |
\usepackage{amsthm}
|
|
61 |
\usepackage{amssymb}
|
519
|
62 |
\usepackage{cleveref}
|
505
|
63 |
%\usepackage{mathtools}
|
468
|
64 |
\usepackage[noend]{algpseudocode}
|
|
65 |
\usepackage{enumitem}
|
|
66 |
\usepackage{nccmath}
|
|
67 |
\usepackage{tikz-cd}
|
|
68 |
\usepackage{tikz}
|
472
|
69 |
\usetikzlibrary{automata, positioning, calc}
|
468
|
70 |
|
|
71 |
%----------------------------------------------------------------------------------------
|
|
72 |
% MARGIN SETTINGS
|
|
73 |
%----------------------------------------------------------------------------------------
|
|
74 |
|
|
75 |
\geometry{
|
|
76 |
paper=a4paper, % Change to letterpaper for US letter
|
|
77 |
inner=2.5cm, % Inner margin
|
|
78 |
outer=3.8cm, % Outer margin
|
|
79 |
bindingoffset=.5cm, % Binding offset
|
|
80 |
top=1.5cm, % Top margin
|
|
81 |
bottom=1.5cm, % Bottom margin
|
|
82 |
%showframe, % Uncomment to show how the type block is set on the page
|
|
83 |
}
|
|
84 |
|
|
85 |
%----------------------------------------------------------------------------------------
|
|
86 |
% THESIS INFORMATION
|
|
87 |
%----------------------------------------------------------------------------------------
|
|
88 |
|
|
89 |
\thesistitle{POSIX Regular Expression Matching and Lexing} % Your thesis title, this is used in the title and abstract, print it elsewhere with \ttitle
|
|
90 |
\supervisor{Dr. Christian \textsc{Urban}} % Your supervisor's name, this is used in the title page, print it elsewhere with \supname
|
|
91 |
\examiner{} % Your examiner's name, this is not currently used anywhere in the template, print it elsewhere with \examname
|
|
92 |
\degree{Doctor of Philosophy} % Your degree name, this is used in the title page and abstract, print it elsewhere with \degreename
|
|
93 |
\author{Chengsong \textsc{Tan}} % Your name, this is used in the title page and abstract, print it elsewhere with \authorname
|
|
94 |
\addresses{} % Your address, this is not currently used anywhere in the template, print it elsewhere with \addressname
|
|
95 |
|
|
96 |
\subject{Computer Science} % Your subject area, this is not currently used anywhere in the template, print it elsewhere with \subjectname
|
|
97 |
\keywords{} % Keywords for your thesis, this is not currently used anywhere in the template, print it elsewhere with \keywordnames
|
|
98 |
\university{\href{https://www.kcl.ac.uk}{King's College London}} % Your university's name and URL, this is used in the title page and abstract, print it elsewhere with \univname
|
|
99 |
\department{\href{https://www.kcl.ac.uk/informatics}{Department or Informatics}} % Your department's name and URL, this is used in the title page and abstract, print it elsewhere with \deptname
|
471
|
100 |
\group{\href{https://www.kcl.ac.uk/research/ssy}{Software Systems Group}} % Your research group's name and URL, this is used in the title page, print it elsewhere with \groupname
|
468
|
101 |
\faculty{\href{http://faculty.university.com}{Chengsong Tan}} % Your faculty's name and URL, this is used in the title page and abstract, print it elsewhere with \facname
|
|
102 |
|
|
103 |
\AtBeginDocument{
|
|
104 |
\hypersetup{pdftitle=\ttitle} % Set the PDF's title to your title
|
|
105 |
\hypersetup{pdfauthor=\authorname} % Set the PDF's author to your name
|
|
106 |
\hypersetup{pdfkeywords=\keywordnames} % Set the PDF's keywords to your keywords
|
|
107 |
}
|
|
108 |
|
|
109 |
\begin{document}
|
|
110 |
|
|
111 |
\frontmatter % Use roman page numbering style (i, ii, iii, iv...) for the pre-content pages
|
|
112 |
|
|
113 |
\pagestyle{plain} % Default to the plain heading style until the thesis style is called for the body content
|
|
114 |
|
|
115 |
%----------------------------------------------------------------------------------------
|
|
116 |
% TITLE PAGE
|
|
117 |
%----------------------------------------------------------------------------------------
|
|
118 |
|
|
119 |
\begin{titlepage}
|
|
120 |
\begin{center}
|
|
121 |
|
|
122 |
\vspace*{.06\textheight}
|
|
123 |
{\scshape\LARGE \univname\par}\vspace{1.5cm} % University name
|
|
124 |
\textsc{\Large Doctoral Thesis}\\[0.5cm] % Thesis type
|
|
125 |
|
|
126 |
\HRule \\[0.4cm] % Horizontal line
|
|
127 |
{\huge \bfseries \ttitle\par}\vspace{0.4cm} % Thesis title
|
|
128 |
\HRule \\[1.5cm] % Horizontal line
|
|
129 |
|
|
130 |
\begin{minipage}[t]{0.4\textwidth}
|
|
131 |
\begin{flushleft} \large
|
|
132 |
\emph{Author:}\\
|
|
133 |
\href{https://kclpure.kcl.ac.uk/portal/en/persons/chengsong-tan(a63b381b-04bc-4cd7-beea-beb3e96cb153).html}{\authorname} % Author name - remove the \href bracket to remove the link
|
|
134 |
\end{flushleft}
|
|
135 |
\end{minipage}
|
|
136 |
|
|
137 |
|
|
138 |
\begin{minipage}[t]{0.4\textwidth}
|
|
139 |
\begin{flushright} \large
|
|
140 |
\emph{Supervisor:} \\
|
|
141 |
\href{https://www.kcl.ac.uk/people/christian-urban}{\supname} % Supervisor name - remove the \href bracket to remove the link
|
|
142 |
\end{flushright}
|
|
143 |
\end{minipage}\\[3cm]
|
|
144 |
|
|
145 |
\vfill
|
|
146 |
|
|
147 |
\large \textit{A thesis submitted in fulfillment of the requirements\\ for the degree of \degreename}\\[0.3cm] % University requirement text
|
|
148 |
\textit{in the}\\[0.4cm]
|
|
149 |
\groupname\\\deptname\\[2cm] % Research group name and department name
|
|
150 |
|
|
151 |
\vfill
|
|
152 |
|
|
153 |
{\large \today}\\[4cm] % Date
|
|
154 |
%\includegraphics{Logo} % University/department logo - uncomment to place it
|
|
155 |
|
|
156 |
\vfill
|
|
157 |
\end{center}
|
|
158 |
\end{titlepage}
|
|
159 |
|
|
160 |
%----------------------------------------------------------------------------------------
|
|
161 |
% DECLARATION PAGE
|
|
162 |
%----------------------------------------------------------------------------------------
|
|
163 |
|
|
164 |
\begin{declaration}
|
|
165 |
\addchaptertocentry{\authorshipname} % Add the declaration to the table of contents
|
|
166 |
\noindent I, \authorname, declare that this thesis titled, \enquote{\ttitle} and the work presented in it are my own. I confirm that:
|
|
167 |
|
|
168 |
\begin{itemize}
|
|
169 |
\item This work was done wholly or mainly while in candidature for a research degree at this University.
|
|
170 |
\item Where any part of this thesis has previously been submitted for a degree or any other qualification at this University or any other institution, this has been clearly stated.
|
|
171 |
\item Where I have consulted the published work of others, this is always clearly attributed.
|
|
172 |
\item Where I have quoted from the work of others, the source is always given. With the exception of such quotations, this thesis is entirely my own work.
|
|
173 |
\item I have acknowledged all main sources of help.
|
|
174 |
\item Where the thesis is based on work done by myself jointly with others, I have made clear exactly what was done by others and what I have contributed myself.\\
|
|
175 |
\end{itemize}
|
|
176 |
|
|
177 |
\noindent Signed:\\
|
|
178 |
\rule[0.5em]{25em}{0.5pt} % This prints a line for the signature
|
|
179 |
|
|
180 |
\noindent Date:\\
|
|
181 |
\rule[0.5em]{25em}{0.5pt} % This prints a line to write the date
|
|
182 |
\end{declaration}
|
|
183 |
|
|
184 |
\cleardoublepage
|
|
185 |
|
|
186 |
%----------------------------------------------------------------------------------------
|
|
187 |
% QUOTATION PAGE
|
|
188 |
%----------------------------------------------------------------------------------------
|
|
189 |
|
|
190 |
\vspace*{0.2\textheight}
|
|
191 |
|
|
192 |
\noindent\enquote{\itshape Thanks to my solid academic training, today I can write hundreds of words on virtually any topic without possessing a shred of information, which is how I got a good job in journalism.}\bigbreak
|
|
193 |
|
|
194 |
\hfill Dave Barry
|
|
195 |
|
|
196 |
%----------------------------------------------------------------------------------------
|
|
197 |
% ABSTRACT PAGE
|
|
198 |
%----------------------------------------------------------------------------------------
|
|
199 |
|
519
|
200 |
%\begin{abstract}
|
|
201 |
|
|
202 |
|
|
203 |
%\end{abstract}
|
|
204 |
|
|
205 |
|
527
|
206 |
\begin{abstract}
|
468
|
207 |
\addchaptertocentry{\abstractname} % Add the abstract to the table of contents
|
527
|
208 |
%\addchap{Abstract}
|
471
|
209 |
This work is about regular expressions and derivatives. It combines functional algorithms and their formal verification in the Isabelle/HOL theorem prover.
|
468
|
210 |
|
|
211 |
Theoretical results say that regular expression matching
|
|
212 |
should be linear with respect to the input.
|
|
213 |
Under a certain class of regular expressions and inputs though,
|
526
|
214 |
practical implementations often suffer from non-linear or even
|
468
|
215 |
exponential running time,
|
526
|
216 |
allowing ReDoS (regular expression denial-of-service ) attacks.
|
|
217 |
The reason behind this is that regex libraries in popular programming
|
|
218 |
languages often want to support richer constructs
|
|
219 |
than the usual basic regular expressions. Unfortunately, this means that even simple cases
|
|
220 |
are "infected" with atrocious running time.
|
|
221 |
|
468
|
222 |
|
526
|
223 |
This work aims to address the above vulnerability by a combination
|
468
|
224 |
of Brzozowski's derivatives and interactive theorem proving. We give an
|
|
225 |
improved version of Sulzmann and Lu's bit-coded algorithm using
|
526
|
226 |
derivatives, and give a formal guarantee in terms of correctness and
|
|
227 |
size of derivatives, which we formalised in Isabelle/HOL.
|
|
228 |
Then we improve the algorithm with a stronger version of
|
468
|
229 |
simplification, and prove a time bound linear to input and
|
526
|
230 |
cubic to regular expression size using a technique introduced by
|
468
|
231 |
Antimirov.
|
|
232 |
The result is an algorithm with provable guarantees on
|
535
|
233 |
correctness and finiteness. We believe this is the first
|
468
|
234 |
work with these two guarantees together.
|
|
235 |
|
527
|
236 |
\end{abstract}
|
519
|
237 |
|
468
|
238 |
|
|
239 |
%----------------------------------------------------------------------------------------
|
|
240 |
% ACKNOWLEDGEMENTS
|
|
241 |
%----------------------------------------------------------------------------------------
|
|
242 |
|
|
243 |
\begin{acknowledgements}
|
|
244 |
\addchaptertocentry{\acknowledgementname} % Add the acknowledgements to the table of contents
|
|
245 |
The acknowledgments and the people to thank go here, don't forget to include your project advisor\ldots
|
|
246 |
\end{acknowledgements}
|
|
247 |
|
|
248 |
%----------------------------------------------------------------------------------------
|
|
249 |
% LIST OF CONTENTS/FIGURES/TABLES PAGES
|
|
250 |
%----------------------------------------------------------------------------------------
|
|
251 |
|
|
252 |
\tableofcontents % Prints the main table of contents
|
|
253 |
|
518
|
254 |
%\listoffigures % Prints the list of figures
|
468
|
255 |
|
518
|
256 |
%\listoftables % Prints the list of tables
|
468
|
257 |
|
|
258 |
%----------------------------------------------------------------------------------------
|
|
259 |
% ABBREVIATIONS
|
|
260 |
%----------------------------------------------------------------------------------------
|
|
261 |
|
|
262 |
\begin{abbreviations}{ll} % Include a list of abbreviations (a table of two columns)
|
|
263 |
|
500
|
264 |
|
|
265 |
\newtheorem{theorem}{Theorem}
|
|
266 |
\newtheorem{lemma}{Lemma}
|
528
|
267 |
\newtheorem{definition}{Definition}
|
505
|
268 |
%proof
|
|
269 |
|
500
|
270 |
|
503
|
271 |
\newcommand\sflat[1][]{\textit{sflat} \, #1}
|
|
272 |
\newcommand{\ASEQ}[3]{\textit{ASEQ}_{#1} \, #2 \, #3}
|
|
273 |
\newcommand{\bderssimp}[2]{#1 \backslash_{bsimp} #2}
|
|
274 |
\newcommand{\rderssimp}[2]{#1 \backslash_{rsimp} #2}
|
|
275 |
\newcommand{\sflataux}[1]{\lbracket #1 \rbracket}
|
468
|
276 |
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
|
|
277 |
\newcommand{\ZERO}{\mbox{\bf 0}}
|
|
278 |
\newcommand{\ONE}{\mbox{\bf 1}}
|
503
|
279 |
\newcommand{\AALTS}[2]{\XOR {\scriptstyle #1}\, #2}
|
|
280 |
|
468
|
281 |
\def\lexer{\mathit{lexer}}
|
|
282 |
\def\mkeps{\mathit{mkeps}}
|
|
283 |
|
500
|
284 |
\def\AZERO{\textit{AZERO}}
|
|
285 |
\def\AONE{\textit{AONE}}
|
|
286 |
\def\ACHAR{\textit{ACHAR}}
|
503
|
287 |
|
|
288 |
|
|
289 |
\def\ALTS{\textit{ALTS}}
|
500
|
290 |
\def\ASTAR{\textit{ASTAR}}
|
468
|
291 |
\def\DFA{\textit{DFA}}
|
|
292 |
\def\bmkeps{\textit{bmkeps}}
|
|
293 |
\def\retrieve{\textit{retrieve}}
|
|
294 |
\def\blexer{\textit{blexer}}
|
|
295 |
\def\flex{\textit{flex}}
|
|
296 |
\def\inj{\mathit{inj}}
|
|
297 |
\def\Empty{\mathit{Empty}}
|
|
298 |
\def\Left{\mathit{Left}}
|
|
299 |
\def\Right{\mathit{Right}}
|
|
300 |
\def\Stars{\mathit{Stars}}
|
|
301 |
\def\Char{\mathit{Char}}
|
|
302 |
\def\Seq{\mathit{Seq}}
|
|
303 |
\def\Der{\mathit{Der}}
|
|
304 |
\def\nullable{\mathit{nullable}}
|
|
305 |
\def\Z{\mathit{Z}}
|
|
306 |
\def\S{\mathit{S}}
|
|
307 |
\def\rup{r^\uparrow}
|
500
|
308 |
%\def\bderssimp{\mathit{bders}\_\mathit{simp}}
|
|
309 |
\def\distinctWith{\textit{distinctWith}}
|
|
310 |
|
503
|
311 |
\def\rexp{\mathbf{rexp}}
|
500
|
312 |
\def\simp{\mathit{simp}}
|
|
313 |
\def\simpALTs{\mathit{simp}\_\mathit{ALTs}}
|
|
314 |
\def\map{\mathit{map}}
|
|
315 |
\def\distinct{\mathit{distinct}}
|
|
316 |
\def\blexersimp{\mathit{blexer}\_\mathit{simp}}
|
|
317 |
\def\map{\textit{map}}
|
|
318 |
\def\vsuf{\textit{vsuf}}
|
|
319 |
\def\sflataux{\textit{sflat}\_\textit{aux}}
|
|
320 |
\def\rrexp{\textit{rrexp}}
|
|
321 |
\def\rsize{\textit{rsize}}
|
|
322 |
\def\asize{\textit{asize}}
|
|
323 |
\def\rerase{\textit{rerase}}
|
|
324 |
\def\erase{\textit{erase}}
|
|
325 |
\def\STAR{\textit{STAR}}
|
|
326 |
\def\flts{\textit{flts}}
|
468
|
327 |
|
503
|
328 |
|
|
329 |
|
468
|
330 |
\end{abbreviations}
|
|
331 |
|
|
332 |
|
|
333 |
|
|
334 |
%----------------------------------------------------------------------------------------
|
|
335 |
% SYMBOLS
|
|
336 |
%----------------------------------------------------------------------------------------
|
|
337 |
|
528
|
338 |
%\begin{symbols}{lll} % Include a list of Symbols (a three column table)
|
468
|
339 |
|
528
|
340 |
%$a$ & distance & \si{\meter} \\
|
|
341 |
%$P$ & power & \si{\watt} (\si{\joule\per\second}) \\
|
468
|
342 |
%Symbol & Name & Unit \\
|
|
343 |
|
528
|
344 |
%\addlinespace % Gap to separate the Roman symbols from the Greek
|
468
|
345 |
|
528
|
346 |
%$\omega$ & angular frequency & \si{\radian} \\
|
468
|
347 |
|
528
|
348 |
%\end{symbols}
|
468
|
349 |
|
|
350 |
%----------------------------------------------------------------------------------------
|
|
351 |
% DEDICATION
|
|
352 |
%----------------------------------------------------------------------------------------
|
|
353 |
|
|
354 |
\dedicatory{For/Dedicated to/To my\ldots}
|
|
355 |
|
|
356 |
%----------------------------------------------------------------------------------------
|
|
357 |
% THESIS CONTENT - CHAPTERS
|
|
358 |
%----------------------------------------------------------------------------------------
|
|
359 |
|
|
360 |
\mainmatter % Begin numeric (1,2,3...) page numbering
|
|
361 |
|
|
362 |
\pagestyle{thesis} % Return the page headers back to the "thesis" style
|
|
363 |
|
|
364 |
% Include the chapters of the thesis as separate files from the Chapters folder
|
|
365 |
% Uncomment the lines as you write the chapters
|
|
366 |
|
532
|
367 |
\include{Chapters/Introduction}
|
|
368 |
\include{Chapters/Inj}
|
|
369 |
\include{Chapters/Bitcoded1}
|
|
370 |
\include{Chapters/Bitcoded2}
|
|
371 |
\include{Chapters/Finite}
|
|
372 |
\include{Chapters/Cubic}
|
|
373 |
\include{Chapters/Further}
|
468
|
374 |
|
|
375 |
%----------------------------------------------------------------------------------------
|
|
376 |
% THESIS CONTENT - APPENDICES
|
|
377 |
%----------------------------------------------------------------------------------------
|
|
378 |
|
|
379 |
\appendix % Cue to tell LaTeX that the following "chapters" are Appendices
|
|
380 |
|
|
381 |
% Include the appendices of the thesis as separate files from the Appendices folder
|
|
382 |
% Uncomment the lines as you write the Appendices
|
|
383 |
|
|
384 |
\include{Appendices/AppendixA}
|
|
385 |
%\include{Appendices/AppendixB}
|
|
386 |
%\include{Appendices/AppendixC}
|
|
387 |
|
|
388 |
%----------------------------------------------------------------------------------------
|
|
389 |
% BIBLIOGRAPHY
|
|
390 |
%----------------------------------------------------------------------------------------
|
|
391 |
|
|
392 |
\printbibliography[heading=bibintoc]
|
|
393 |
|
|
394 |
%----------------------------------------------------------------------------------------
|
|
395 |
|
|
396 |
\end{document}
|