thys2/Paper/document/root.tex
author Christian Urban <christian.urban@kcl.ac.uk>
Wed, 02 Feb 2022 14:52:41 +0000
changeset 405 3cfea5bb5e23
parent 402 1612f2a77bf6
child 416 57182b36ec01
permissions -rwxr-xr-x
updated some of the text and cardinality proof

\documentclass[runningheads]{lipics-v2021}
\usepackage{times}
\usepackage{isabelle}
\usepackage{isabellesym}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathpartir}
\usepackage{tikz}
\usepackage{pgf}
\usetikzlibrary{positioning}
%\usepackage{pdfsetup}
%\usepackage{stmaryrd}
%\usepackage{url}
%\usepackage{color}
%\usepackage[safe]{tipa}
%\usepackage[sc]{mathpazo}
%\usepackage{fontspec}
%\setmainfont[Ligatures=TeX]{Palatino Linotype}




\urlstyle{rm}
\isabellestyle{it}
\renewcommand{\isastyleminor}{\it}% 
\renewcommand{\isastyle}{\normalsize\it}%


\def\dn{\,\stackrel{\mbox{\scriptsize def}}{=}\,}
\renewcommand{\isasymequiv}{$\dn$}
\renewcommand{\isasymemptyset}{$\varnothing$}
\renewcommand{\isacharunderscore}{\mbox{$\_\!\_$}}
\renewcommand{\isasymiota}{\makebox[0mm]{${}^{\prime}$}}
\renewcommand{\isasymin}{\ensuremath{\,\in\,}}


\def\Brz{Brzozowski}
\def\der{\backslash}
\newtheorem{falsehood}{Falsehood}
\newtheorem{conject}{Conjecture}

\bibliographystyle{plainurl}

\title{{POSIX} {L}exing with {B}itcoded {D}erivatives}
\titlerunning{POSIX Lexing with Bitcoded Derivatives}
\author{Chengsong Tan}{King's College London}{chengsong.tan@kcl.ac.uk}{}{}
\author{Christian Urban}{King's College London}{christian.urban@kcl.ac.uk}{}{}
\authorrunning{C.~Tan and C.~Urban}
\keywords{POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL}
\category{}
\ccsdesc[100]{Design and analysis of algorithms}
\ccsdesc[100]{Formal languages and automata theory}
\Copyright{\mbox{}}
\nolinenumbers


\begin{document}
\maketitle

\begin{abstract}
  Sulzmann and Lu described a lexing algorithm that calculates
  Brzozowski derivatives using bitcodes annotated to regular
  expressions.  Their algorithm generates POSIX values which encode
  the information of \emph{how} a regular expression matches a
  string---that is, which part of the string is matched by which part
  of the regular expression.  The purpose of the bitcodes is to generate POSIX values incrementally while
  derivatives are calculated. They also help with designing
  an ``aggressive'' simplification function that keeps the size of
  derivatives small. Without simplification the size derivatives can grow
  exponentially resulting in an extremely slow lexing algorithm.  In this
  paper we describe a variant of Sulzmann and Lu's algorithm: Our
  algorithm is a recursive functional program, whereas Sulzmann
  and Lu's version involves a fixpoint construction. We \textit{(i)}
  prove in Isabelle/HOL that our program is correct and generates
  unique POSIX values; we also \textit{(ii)} establish a polynomial
  bound for the size of the derivatives. The size can be seen as a
  proxy measure for the efficiency of the lexing algorithm: because of
  the polynomial bound our algorithm does not suffer from
  the exponential blowup in earlier works.
  
  % Brzozowski introduced the notion of derivatives for regular
  % expressions. They can be used for a very simple regular expression
  % matching algorithm.  Sulzmann and Lu cleverly extended this
  % algorithm in order to deal with POSIX matching, which is the
  % underlying disambiguation strategy for regular expressions needed
  % in lexers.  Their algorithm generates POSIX values which encode
  % the information of \emph{how} a regular expression matches a
  % string---that is, which part of the string is matched by which
  % part of the regular expression.  In this paper we give our
  % inductive definition of what a POSIX value is and show $(i)$ that
  % such a value is unique (for given regular expression and string
  % being matched) and $(ii)$ that Sulzmann and Lu's algorithm always
  % generates such a value (provided that the regular expression
  % matches the string). We show that $(iii)$ our inductive definition
  % of a POSIX value is equivalent to an alternative definition by
  % Okui and Suzuki which identifies POSIX values as least elements
  % according to an ordering of values.  We also prove the correctness
  % of Sulzmann's bitcoded version of the POSIX matching algorithm and
  % extend the results to additional constructors for regular
  % expressions.  \smallskip
\end{abstract}



\input{session}



\end{document}

%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: