twtsevms/twtsevms.tex
author Chengsong
Fri, 10 Apr 2020 11:58:11 +0100
changeset 148 c8ef391dd6f7
parent 147 dfcf3fa58d7f
permissions -rw-r--r--
vunsimp

\documentclass[a4paper,UKenglish]{lipics}
\usepackage{data}
\usepackage{graphic}
\usepackage{tikz-cd}
\usepackage{tikz}

%\usetikzlibrary{graphs}
%\usetikzlibrary{graphdrawing}
%\usegdlibrary{trees}
%\usepackage{algorithm}
\usepackage{amsmath}
\usepackage{xcolor}
\usepackage[noend]{algpseudocode}
\usepackage{enumitem}
\usepackage{nccmath}
\usepackage{soul}
%works?
\definecolor{darkblue}{rgb}{0,0,0.6}
\hypersetup{colorlinks=true,allcolors=darkblue}
\newcommand{\comment}[1]%
{{\color{red}$\Rightarrow$}\marginpar{\raggedright\small{\bf\color{red}#1}}}

% \documentclass{article}
%\usepackage[utf8]{inputenc}
%\usepackage[english]{babel}
%\usepackage{listings}
% \usepackage{amsthm}
%\usepackage{hyperref}
% \usepackage[margin=0.5in]{geometry}
%\usepackage{pmboxdraw}
 
\title{POSIX Regular Expression Matching and Lexing}
\author{Chengsong Tan}
\affil{King's College London\\
London, UK\\
\texttt{chengsong.tan@kcl.ac.uk}}
\authorrunning{Chengsong Tan}
\Copyright{Chengsong Tan}

\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
\newcommand{\ZERO}{\mbox{\bf 0}}
\newcommand{\ONE}{\mbox{\bf 1}}
\def\erase{\textit{erase}}
\def\bders{\textit{bders}}
\def\lexer{\mathit{lexer}}
\def\blexer{\textit{blexer}}
\def\fuse{\textit{fuse}}
\def\flatten{\textit{flatten}}
\def\map{\textit{map}}
\def\blexers{\mathit{blexer\_simp}}
\def\simp{\mathit{simp}}
\def\mkeps{\mathit{mkeps}}
\def\bmkeps{\textit{bmkeps}}
\def\inj{\mathit{inj}}
\def\Empty{\mathit{Empty}}
\def\Left{\mathit{Left}}
\def\Right{\mathit{Right}}
\def\Stars{\mathit{Stars}}
\def\Char{\mathit{Char}}
\def\Seq{\mathit{Seq}}
\def\Der{\mathit{Der}}
\def\nullable{\mathit{nullable}}
\def\Z{\mathit{Z}}
\def\S{\mathit{S}}
\def\flex{\textit{flex}}
\def\rup{r^\uparrow}
\def\retrieve{\textit{retrieve}}
\def\AALTS{\textit{AALTS}}
\def\AONE{\textit{AONE}}
%\theoremstyle{theorem}
%\newtheorem{theorem}{Theorem}
%\theoremstyle{lemma}
%\newtheorem{lemma}{Lemma}
%\newcommand{\lemmaautorefname}{Lemma}
%\theoremstyle{definition}
%\newtheorem{definition}{Definition}
\algnewcommand\algorithmicswitch{\textbf{switch}}
\algnewcommand\algorithmiccase{\textbf{case}}
\algnewcommand\algorithmicassert{\texttt{assert}}
\algnewcommand\Assert[1]{\State \algorithmicassert(#1)}%
% New "environments"
\algdef{SE}[SWITCH]{Switch}{EndSwitch}[1]{\algorithmicswitch\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicswitch}%
\algdef{SE}[CASE]{Case}{EndCase}[1]{\algorithmiccase\ #1}{\algorithmicend\ \algorithmiccase}%
\algtext*{EndSwitch}%
\algtext*{EndCase}%


\begin{document}

\maketitle
Suppose (basic) regular expressions are given by the following grammar:

\[			r ::=   \ZERO \mid  \ONE
			 \mid  c  
			 \mid  r_1 \cdot r_2
			 \mid  r_1 + r_2   
			 \mid r^*         
\]


If we let the alphabet
where $c$ is selected from
be $\sum = \{0,1\}$,
then we can define the regular expression 
style bitcodes:


\[			 rbs ::=   \ZERO \mid  \ONE
			 \mid  1
			 \mid  0  
			 \mid  rbs_1 \cdot rbs_2
			 \mid  \sum{rbs_{list}}
			 \mid rbs^*         
\]

This is our list definition of bitcodes:
\begin{center}
$b ::=   1 \mid  0 \qquad
bs ::= [] \mid b::bs    $
\end{center}

Define an isomorphism between bitcodes based on lists
and bitcodes based on regular expressions:
\begin{center}
\begin{tabular}{lcl}
$\sigma: bit \; list$ & $\Rightarrow$ & $bit \; regex$\\
$\delta: bit$ & $\Rightarrow$ & $bit \; regex$\\
$\sigma([])$ & $=$ & $ \ONE$\\
$\sigma(b::bs)$ & $=$ & $\delta(b) \cdot \sigma(bs)$\\
$\delta(b) $ & $=$ & $b\; as \;a \;regex$
\end{tabular}
\end{center}

\emph{Annotated regular expressions} can be defined by the following
grammar using the  list definition of $bs$ :

\begin{center}
\begin{tabular}{lcl}
  $\textit{a}$ & $::=$  & $\ZERO$\\
                  & $\mid$ & $_{bs}\ONE$\\
                  & $\mid$ & $_{bs}{\bf c}$\\
                  & $\mid$ & $_{bs}\sum\,as$\\
                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
                  & $\mid$ & $_{bs}a^*$
\end{tabular}    
\end{center}  
%Let the set of all bitcoded regular expressions be $\textit{BS}$.
%Let the set of all annotated regular expression be $\textit{AR}$.
Let us play with the function $f: \textit{annotated}\; \textit{regex} \Rightarrow \textit{bit}\; \textit{regex}$ on annotated regular expressions:
\begin{center}
\begin{tabular}{lcl}
$f(\ZERO)$ & $=$ & $\ZERO$\\
$f(_{bs}\ONE)$ & $=$ & $\sigma(\textit{bs})$\\
$f(_{bs}a)$ & $=$ & $\sigma(\textit{bs}) $\\
$f(_{bs}r_1\cdot r_2)$ & $=$ & $\sigma(\textit{bs}) \cdot( f(r_1) \cdot f(r_2))$\\
$f(_{bs}\sum{rs})$ & $=$ & $\sigma(\textit{bs}) \cdot \sum\limits_{r \in rs}{f(\textit{r})}$\\
$f(_{bs}r^*)$ & $=$ & $\sigma(\textit{bs}) \cdot((0 \cdot f(r))^*\cdot 1) $
\end{tabular}
\end{center}

Now define function $L: bit\;regex \Rightarrow set \; bs$ as follows:
\begin{center}
\begin{tabular}{lcl}
$L(\ZERO)$ & $=$ & $\emptyset$\\
$L(\ONE) $ & $= $ & $\{[]\}$\\
$L(b\;as \; a\;regex) $ & $=$ & $\{b\; as \; a \; bit\}$\\
$L(bs_1 \cdot bs_2)$ & $=$ & $L(bs_1) \times L(bs_2)$\\
$L(\sum bss) $ & $=$ &$\bigcup\limits_{bs \in bss}{L(bs)}$\\
$L(bs^*)$ & $= $ & $\bigcup\limits_{0 \leq n}{(L(bs))^n}$
\end{tabular}
\end{center}

We claim that:
\begin{center}
$L(f(a) )= \{bs \mid a \gg bs\}$.
\end{center}

where contains is defined this way:
\begin{center}
\begin{tabular}{llclll}
& & & $_{bs}\ONE$ & $\gg$ & $\textit{bs}$\\
& & & $_{bs}{\bf c}$ & $\gg$ & $\textit{bs}$\\
$\textit{if} \; r_1 \gg \textit{bs}_1$ & $r_2 \; \gg \textit{bs}_2$
& $\textit{then}$  &
 $_{bs}{r_1 \cdot r_2}$ &
 $\gg$ &
 $\textit{bs} @ \textit{bs}_1 @ \textit{bs}_2$\\

 $\textit{if} \; r \gg \textit{bs}_1$ &  & $\textit{then}$  &
 $_{bs}{\sum(r :: \textit{rs}})$ &
 $\gg$ &
 $\textit{bs} @ \textit{bs}_1 $\\

 $\textit{if} \; _{bs}(\sum \textit{rs}) \gg \textit{bs} @ \textit{bs}_1$ &  & $\textit{then}$  &
 $_{bs}{\sum(r :: \textit{rs}})$ &
 $\gg$ &
 $\textit{bs} @ \textit{bs}_1 $\\

 $\textit{if} \; r \gg \textit{bs}_1\; \textit{and}$ &  $_{bs}r^* \gg \textit{bs} @ \textit{bs}_2$ & $\textit{then}$  &
 $_{bs}r^* $ &
 $\gg$ &
 $\textit{bs} @ [0] @ \textit{bs}_1@ \textit{bs}_2 $\\

 & & & $_{bs}r^*$ & $\gg$ & $\textit{bs} @ [1]$\\
\end{tabular}
\end{center}

Moreover, we claim the fact that
\begin{center}
	$L(f(a^*)) != L(f(a^* \backslash a))$
\end{center}
This is by $\exists bs \in f(a^*), s.t. bs \notin f(a^* \backslash a)$.
Proof for the fact
$L(f(a) )= \{bs \mid a \gg bs\}$:

\bibliographystyle{plain}
\bibliography{root}


\end{document}