twtsevms/twtsevms.tex
author Chengsong
Wed, 04 Mar 2020 13:25:52 +0000
changeset 147 dfcf3fa58d7f
parent 146 676440e0a233
child 148 c8ef391dd6f7
permissions -rw-r--r--
nteresting

\documentclass[a4paper,UKenglish]{lipics}
\usepackage{graphic}
\usepackage{data}
\usepackage{tikz-cd}
\usepackage{tikz}

%\usetikzlibrary{graphs}
%\usetikzlibrary{graphdrawing}
%\usegdlibrary{trees}

%\usepackage{algorithm}
\usepackage{amsmath}
\usepackage{xcolor}
\usepackage[noend]{algpseudocode}
\usepackage{enumitem}
\usepackage{nccmath}
\usepackage{soul}

\definecolor{darkblue}{rgb}{0,0,0.6}
\hypersetup{colorlinks=true,allcolors=darkblue}
\newcommand{\comment}[1]%
{{\color{red}$\Rightarrow$}\marginpar{\raggedright\small{\bf\color{red}#1}}}

% \documentclass{article}
%\usepackage[utf8]{inputenc}
%\usepackage[english]{babel}
%\usepackage{listings}
% \usepackage{amsthm}
%\usepackage{hyperref}
% \usepackage[margin=0.5in]{geometry}
%\usepackage{pmboxdraw}
 
\title{POSIX Regular Expression Matching and Lexing}
\author{Chengsong Tan}
\affil{King's College London\\
London, UK\\
\texttt{chengsong.tan@kcl.ac.uk}}
\authorrunning{Chengsong Tan}
\Copyright{Chengsong Tan}

\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
\newcommand{\ZERO}{\mbox{\bf 0}}
\newcommand{\ONE}{\mbox{\bf 1}}
\def\erase{\textit{erase}}
\def\bders{\textit{bders}}
\def\lexer{\mathit{lexer}}
\def\blexer{\textit{blexer}}
\def\fuse{\textit{fuse}}
\def\flatten{\textit{flatten}}
\def\map{\textit{map}}
\def\blexers{\mathit{blexer\_simp}}
\def\simp{\mathit{simp}}
\def\mkeps{\mathit{mkeps}}
\def\bmkeps{\textit{bmkeps}}
\def\inj{\mathit{inj}}
\def\Empty{\mathit{Empty}}
\def\Left{\mathit{Left}}
\def\Right{\mathit{Right}}
\def\Stars{\mathit{Stars}}
\def\Char{\mathit{Char}}
\def\Seq{\mathit{Seq}}
\def\Der{\mathit{Der}}
\def\nullable{\mathit{nullable}}
\def\Z{\mathit{Z}}
\def\S{\mathit{S}}
\def\flex{\textit{flex}}
\def\rup{r^\uparrow}
\def\retrieve{\textit{retrieve}}
\def\AALTS{\textit{AALTS}}
\def\AONE{\textit{AONE}}
%\theoremstyle{theorem}
%\newtheorem{theorem}{Theorem}
%\theoremstyle{lemma}
%\newtheorem{lemma}{Lemma}
%\newcommand{\lemmaautorefname}{Lemma}
%\theoremstyle{definition}
%\newtheorem{definition}{Definition}
\algnewcommand\algorithmicswitch{\textbf{switch}}
\algnewcommand\algorithmiccase{\textbf{case}}
\algnewcommand\algorithmicassert{\texttt{assert}}
\algnewcommand\Assert[1]{\State \algorithmicassert(#1)}%
% New "environments"
\algdef{SE}[SWITCH]{Switch}{EndSwitch}[1]{\algorithmicswitch\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicswitch}%
\algdef{SE}[CASE]{Case}{EndCase}[1]{\algorithmiccase\ #1}{\algorithmicend\ \algorithmiccase}%
\algtext*{EndSwitch}%
\algtext*{EndCase}%


\begin{document}

\maketitle
Suppose (basic) regular expressions are given by the following grammar:

\[			r ::=   \ZERO \mid  \ONE
			 \mid  c  
			 \mid  r_1 \cdot r_2
			 \mid  r_1 + r_2   
			 \mid r^*         
\]


If we let the alphabet
where $c$ is selected from
be $\sum = \{0,1\}$,
then bitcodes can be defined in a 
regular expression style:


\[			 bs ::=   \ZERO \mid  \ONE
			 \mid  1
			 \mid  0  
			 \mid  bs_1 \cdot bs_2
			 \mid  \sum{bs_{list}}
			 \mid bs^*         
\]

We can define an isomorphism between the regex
definition of bitcodes and our list definition of bitcodes:
\begin{center}
		$b ::=   1 \mid  0 \qquad
bs ::= [] \mid b::bs    
$
\end{center}
For example we can let $\sigma([])= \ONE$.
But how to define such isomorphism in detail is not explicitly needed for now.

\emph{Annotated regular expressions} can be defined by the following
grammar using new $bs$ definition:

\begin{center}
\begin{tabular}{lcl}
  $\textit{a}$ & $::=$  & $\ZERO$\\
                  & $\mid$ & $_{bs}\ONE$\\
                  & $\mid$ & $_{bs}{\bf c}$\\
                  & $\mid$ & $_{bs}\sum\,as$\\
                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
                  & $\mid$ & $_{bs}a^*$
\end{tabular}    
\end{center}  
Let the set of all bitcoded regular expressions be $\textit{BS}$.
Let the set of all annotated regular expression be $\textit{AR}$.
Let us play with the function $f: \textit{AR} \rightarrow \textit{BS}$ on annotated regular expressions:
\begin{center}
$f(\ZERO) = \ZERO$\\
$f(_{bs}\ONE) = \textit{bs}$\\
$f(_{bs}a) = \textit{bs} $\\
$f(_{bs}r_1\cdot r_2) = \textit{bs} \cdot( f(r_1) \cdot f(r_2))$\\
$f(_{bs}\sum{rs}) = \textit{bs} \cdot \sum\limits_{r \in rs}{f(\textit{r})}$\\
$f(_{bs}r^*) = \textit{bs} \cdot((0 \cdot f(r))^*\cdot 1) $
\end{center}

We claim that:
\begin{center}
$f(a) = \{bs \mid a \gg bs\}$.
\end{center}

\bibliographystyle{plain}
\bibliography{root}


\end{document}