\documentclass[a4paper,UKenglish]{lipics}
\usepackage{graphic}
\usepackage{data}
\usepackage{tikz-cd}
\usepackage{tikz}
%\usetikzlibrary{graphs}
%\usetikzlibrary{graphdrawing}
%\usegdlibrary{trees}
%\usepackage{algorithm}
\usepackage{amsmath}
\usepackage{xcolor}
\usepackage[noend]{algpseudocode}
\usepackage{enumitem}
\usepackage{nccmath}
\usepackage{soul}
\definecolor{darkblue}{rgb}{0,0,0.6}
\hypersetup{colorlinks=true,allcolors=darkblue}
\newcommand{\comment}[1]%
{{\color{red}$\Rightarrow$}\marginpar{\raggedright\small{\bf\color{red}#1}}}
% \documentclass{article}
%\usepackage[utf8]{inputenc}
%\usepackage[english]{babel}
%\usepackage{listings}
% \usepackage{amsthm}
%\usepackage{hyperref}
% \usepackage[margin=0.5in]{geometry}
%\usepackage{pmboxdraw}
\title{POSIX Regular Expression Matching and Lexing}
\author{Chengsong Tan}
\affil{King's College London\\
London, UK\\
\texttt{chengsong.tan@kcl.ac.uk}}
\authorrunning{Chengsong Tan}
\Copyright{Chengsong Tan}
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
\newcommand{\ZERO}{\mbox{\bf 0}}
\newcommand{\ONE}{\mbox{\bf 1}}
\def\erase{\textit{erase}}
\def\bders{\textit{bders}}
\def\lexer{\mathit{lexer}}
\def\blexer{\textit{blexer}}
\def\fuse{\textit{fuse}}
\def\flatten{\textit{flatten}}
\def\map{\textit{map}}
\def\blexers{\mathit{blexer\_simp}}
\def\simp{\mathit{simp}}
\def\mkeps{\mathit{mkeps}}
\def\bmkeps{\textit{bmkeps}}
\def\inj{\mathit{inj}}
\def\Empty{\mathit{Empty}}
\def\Left{\mathit{Left}}
\def\Right{\mathit{Right}}
\def\Stars{\mathit{Stars}}
\def\Char{\mathit{Char}}
\def\Seq{\mathit{Seq}}
\def\Der{\mathit{Der}}
\def\nullable{\mathit{nullable}}
\def\Z{\mathit{Z}}
\def\S{\mathit{S}}
\def\flex{\textit{flex}}
\def\rup{r^\uparrow}
\def\retrieve{\textit{retrieve}}
\def\AALTS{\textit{AALTS}}
\def\AONE{\textit{AONE}}
%\theoremstyle{theorem}
%\newtheorem{theorem}{Theorem}
%\theoremstyle{lemma}
%\newtheorem{lemma}{Lemma}
%\newcommand{\lemmaautorefname}{Lemma}
%\theoremstyle{definition}
%\newtheorem{definition}{Definition}
\algnewcommand\algorithmicswitch{\textbf{switch}}
\algnewcommand\algorithmiccase{\textbf{case}}
\algnewcommand\algorithmicassert{\texttt{assert}}
\algnewcommand\Assert[1]{\State \algorithmicassert(#1)}%
% New "environments"
\algdef{SE}[SWITCH]{Switch}{EndSwitch}[1]{\algorithmicswitch\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicswitch}%
\algdef{SE}[CASE]{Case}{EndCase}[1]{\algorithmiccase\ #1}{\algorithmicend\ \algorithmiccase}%
\algtext*{EndSwitch}%
\algtext*{EndCase}%
\begin{document}
\maketitle
Suppose (basic) regular expressions are given by the following grammar:
\[ r ::= \ZERO \mid \ONE
\mid c
\mid r_1 \cdot r_2
\mid r_1 + r_2
\mid r^*
\]
If we let the alphabet
where $c$ is selected from
be $\sum = \{0,1\}$,
then bitcodes can be defined in a
regular expression style:
\[ bs ::= \ZERO \mid \ONE
\mid 1
\mid 0
\mid bs_1 \cdot bs_2
\mid \sum{bs_{list}}
\mid bs^*
\]
We can define an isomorphism between the regex
definition of bitcodes and our list definition of bitcodes:
\begin{center}
$b ::= 1 \mid 0 \qquad
bs ::= [] \mid b::bs
$
\end{center}
For example we can let $\sigma([])= \ONE$.
But how to define such isomorphism in detail is not explicitly needed for now.
\emph{Annotated regular expressions} can be defined by the following
grammar using new $bs$ definition:
\begin{center}
\begin{tabular}{lcl}
$\textit{a}$ & $::=$ & $\ZERO$\\
& $\mid$ & $_{bs}\ONE$\\
& $\mid$ & $_{bs}{\bf c}$\\
& $\mid$ & $_{bs}\sum\,as$\\
& $\mid$ & $_{bs}a_1\cdot a_2$\\
& $\mid$ & $_{bs}a^*$
\end{tabular}
\end{center}
Let the set of all bitcoded regular expressions be $\textit{BS}$.
Let the set of all annotated regular expression be $\textit{AR}$.
Let us play with the function $f: \textit{AR} \rightarrow \textit{BS}$ on annotated regular expressions:
\begin{center}
$f(\ZERO) = \ZERO$\\
$f(_{bs}\ONE) = \textit{bs}$\\
$f(_{bs}a) = \textit{bs} $\\
$f(_{bs}r_1\cdot r_2) = \textit{bs} \cdot( f(r_1) \cdot f(r_2))$\\
$f(_{bs}\sum{rs}) = \textit{bs} \cdot \sum\limits_{r \in rs}{f(\textit{r})}$\\
$f(_{bs}r^*) = \textit{bs} \cdot((0 \cdot f(r))^*\cdot 1) $
\end{center}
We claim that:
\begin{center}
$f(a) = \{bs \mid a \gg bs\}$.
\end{center}
\bibliographystyle{plain}
\bibliography{root}
\end{document}