twtsevms/twtsevms.tex
author Chengsong
Wed, 04 Mar 2020 13:25:52 +0000
changeset 147 dfcf3fa58d7f
parent 146 676440e0a233
child 148 c8ef391dd6f7
permissions -rw-r--r--
nteresting
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     1
\documentclass[a4paper,UKenglish]{lipics}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     2
\usepackage{graphic}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     3
\usepackage{data}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     4
\usepackage{tikz-cd}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     5
\usepackage{tikz}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     6
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     7
%\usetikzlibrary{graphs}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     8
%\usetikzlibrary{graphdrawing}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
     9
%\usegdlibrary{trees}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    10
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    11
%\usepackage{algorithm}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    12
\usepackage{amsmath}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    13
\usepackage{xcolor}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    14
\usepackage[noend]{algpseudocode}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    15
\usepackage{enumitem}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    16
\usepackage{nccmath}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    17
\usepackage{soul}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    18
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    19
\definecolor{darkblue}{rgb}{0,0,0.6}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    20
\hypersetup{colorlinks=true,allcolors=darkblue}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    21
\newcommand{\comment}[1]%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    22
{{\color{red}$\Rightarrow$}\marginpar{\raggedright\small{\bf\color{red}#1}}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    23
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    24
% \documentclass{article}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    25
%\usepackage[utf8]{inputenc}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    26
%\usepackage[english]{babel}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    27
%\usepackage{listings}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    28
% \usepackage{amsthm}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    29
%\usepackage{hyperref}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    30
% \usepackage[margin=0.5in]{geometry}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    31
%\usepackage{pmboxdraw}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    32
 
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    33
\title{POSIX Regular Expression Matching and Lexing}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    34
\author{Chengsong Tan}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    35
\affil{King's College London\\
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    36
London, UK\\
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    37
\texttt{chengsong.tan@kcl.ac.uk}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    38
\authorrunning{Chengsong Tan}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    39
\Copyright{Chengsong Tan}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    40
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    41
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    42
\newcommand{\ZERO}{\mbox{\bf 0}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    43
\newcommand{\ONE}{\mbox{\bf 1}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    44
\def\erase{\textit{erase}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    45
\def\bders{\textit{bders}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    46
\def\lexer{\mathit{lexer}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    47
\def\blexer{\textit{blexer}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    48
\def\fuse{\textit{fuse}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    49
\def\flatten{\textit{flatten}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    50
\def\map{\textit{map}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    51
\def\blexers{\mathit{blexer\_simp}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    52
\def\simp{\mathit{simp}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    53
\def\mkeps{\mathit{mkeps}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    54
\def\bmkeps{\textit{bmkeps}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    55
\def\inj{\mathit{inj}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    56
\def\Empty{\mathit{Empty}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    57
\def\Left{\mathit{Left}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    58
\def\Right{\mathit{Right}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    59
\def\Stars{\mathit{Stars}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    60
\def\Char{\mathit{Char}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    61
\def\Seq{\mathit{Seq}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    62
\def\Der{\mathit{Der}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    63
\def\nullable{\mathit{nullable}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    64
\def\Z{\mathit{Z}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    65
\def\S{\mathit{S}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    66
\def\flex{\textit{flex}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    67
\def\rup{r^\uparrow}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    68
\def\retrieve{\textit{retrieve}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    69
\def\AALTS{\textit{AALTS}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    70
\def\AONE{\textit{AONE}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    71
%\theoremstyle{theorem}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    72
%\newtheorem{theorem}{Theorem}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    73
%\theoremstyle{lemma}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    74
%\newtheorem{lemma}{Lemma}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    75
%\newcommand{\lemmaautorefname}{Lemma}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    76
%\theoremstyle{definition}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    77
%\newtheorem{definition}{Definition}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    78
\algnewcommand\algorithmicswitch{\textbf{switch}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    79
\algnewcommand\algorithmiccase{\textbf{case}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    80
\algnewcommand\algorithmicassert{\texttt{assert}}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    81
\algnewcommand\Assert[1]{\State \algorithmicassert(#1)}%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    82
% New "environments"
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    83
\algdef{SE}[SWITCH]{Switch}{EndSwitch}[1]{\algorithmicswitch\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicswitch}%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    84
\algdef{SE}[CASE]{Case}{EndCase}[1]{\algorithmiccase\ #1}{\algorithmicend\ \algorithmiccase}%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    85
\algtext*{EndSwitch}%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    86
\algtext*{EndCase}%
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    87
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    88
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    89
\begin{document}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    90
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    91
\maketitle
147
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    92
Suppose (basic) regular expressions are given by the following grammar:
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
    93
147
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    94
\[			r ::=   \ZERO \mid  \ONE
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    95
			 \mid  c  
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    96
			 \mid  r_1 \cdot r_2
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    97
			 \mid  r_1 + r_2   
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    98
			 \mid r^*         
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
    99
\]
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   100
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   101
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   102
If we let the alphabet
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   103
where $c$ is selected from
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   104
be $\sum = \{0,1\}$,
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   105
then bitcodes can be defined in a 
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   106
regular expression style:
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   107
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   108
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   109
\[			 bs ::=   \ZERO \mid  \ONE
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   110
			 \mid  1
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   111
			 \mid  0  
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   112
			 \mid  bs_1 \cdot bs_2
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   113
			 \mid  \sum{bs_{list}}
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   114
			 \mid bs^*         
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   115
\]
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   116
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   117
We can define an isomorphism between the regex
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   118
definition of bitcodes and our list definition of bitcodes:
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   119
\begin{center}
147
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   120
		$b ::=   1 \mid  0 \qquad
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   121
bs ::= [] \mid b::bs    
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   122
$
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   123
\end{center}
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   124
For example we can let $\sigma([])= \ONE$.
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   125
But how to define such isomorphism in detail is not explicitly needed for now.
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   126
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   127
\emph{Annotated regular expressions} can be defined by the following
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   128
grammar using new $bs$ definition:
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   129
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   130
\begin{center}
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   131
\begin{tabular}{lcl}
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   132
  $\textit{a}$ & $::=$  & $\ZERO$\\
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   133
                  & $\mid$ & $_{bs}\ONE$\\
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   134
                  & $\mid$ & $_{bs}{\bf c}$\\
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   135
                  & $\mid$ & $_{bs}\sum\,as$\\
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   136
                  & $\mid$ & $_{bs}a_1\cdot a_2$\\
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   137
                  & $\mid$ & $_{bs}a^*$
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   138
\end{tabular}    
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   139
\end{center}  
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   140
Let the set of all bitcoded regular expressions be $\textit{BS}$.
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   141
Let the set of all annotated regular expression be $\textit{AR}$.
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   142
Let us play with the function $f: \textit{AR} \rightarrow \textit{BS}$ on annotated regular expressions:
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   143
\begin{center}
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   144
$f(\ZERO) = \ZERO$\\
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   145
$f(_{bs}\ONE) = \textit{bs}$\\
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   146
$f(_{bs}a) = \textit{bs} $\\
147
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   147
$f(_{bs}r_1\cdot r_2) = \textit{bs} \cdot( f(r_1) \cdot f(r_2))$\\
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   148
$f(_{bs}\sum{rs}) = \textit{bs} \cdot \sum\limits_{r \in rs}{f(\textit{r})}$\\
147
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   149
$f(_{bs}r^*) = \textit{bs} \cdot((0 \cdot f(r))^*\cdot 1) $
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   150
\end{center}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   151
147
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   152
We claim that:
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   153
\begin{center}
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   154
$f(a) = \{bs \mid a \gg bs\}$.
dfcf3fa58d7f nteresting
Chengsong
parents: 146
diff changeset
   155
\end{center}
146
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   156
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   157
\bibliographystyle{plain}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   158
\bibliography{root}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   159
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   160
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   161
\end{document}
676440e0a233 daily little report
Chengsong
parents:
diff changeset
   162