146
|
1 |
\documentclass[a4paper,UKenglish]{lipics}
|
148
|
2 |
\usepackage{data}
|
146
|
3 |
\usepackage{graphic}
|
|
4 |
\usepackage{tikz-cd}
|
|
5 |
\usepackage{tikz}
|
|
6 |
|
|
7 |
%\usetikzlibrary{graphs}
|
|
8 |
%\usetikzlibrary{graphdrawing}
|
|
9 |
%\usegdlibrary{trees}
|
|
10 |
%\usepackage{algorithm}
|
|
11 |
\usepackage{amsmath}
|
|
12 |
\usepackage{xcolor}
|
|
13 |
\usepackage[noend]{algpseudocode}
|
|
14 |
\usepackage{enumitem}
|
|
15 |
\usepackage{nccmath}
|
|
16 |
\usepackage{soul}
|
148
|
17 |
%works?
|
146
|
18 |
\definecolor{darkblue}{rgb}{0,0,0.6}
|
|
19 |
\hypersetup{colorlinks=true,allcolors=darkblue}
|
|
20 |
\newcommand{\comment}[1]%
|
|
21 |
{{\color{red}$\Rightarrow$}\marginpar{\raggedright\small{\bf\color{red}#1}}}
|
|
22 |
|
|
23 |
% \documentclass{article}
|
|
24 |
%\usepackage[utf8]{inputenc}
|
|
25 |
%\usepackage[english]{babel}
|
|
26 |
%\usepackage{listings}
|
|
27 |
% \usepackage{amsthm}
|
|
28 |
%\usepackage{hyperref}
|
|
29 |
% \usepackage[margin=0.5in]{geometry}
|
|
30 |
%\usepackage{pmboxdraw}
|
|
31 |
|
|
32 |
\title{POSIX Regular Expression Matching and Lexing}
|
|
33 |
\author{Chengsong Tan}
|
|
34 |
\affil{King's College London\\
|
|
35 |
London, UK\\
|
|
36 |
\texttt{chengsong.tan@kcl.ac.uk}}
|
|
37 |
\authorrunning{Chengsong Tan}
|
|
38 |
\Copyright{Chengsong Tan}
|
|
39 |
|
|
40 |
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
|
|
41 |
\newcommand{\ZERO}{\mbox{\bf 0}}
|
|
42 |
\newcommand{\ONE}{\mbox{\bf 1}}
|
|
43 |
\def\erase{\textit{erase}}
|
|
44 |
\def\bders{\textit{bders}}
|
|
45 |
\def\lexer{\mathit{lexer}}
|
|
46 |
\def\blexer{\textit{blexer}}
|
|
47 |
\def\fuse{\textit{fuse}}
|
|
48 |
\def\flatten{\textit{flatten}}
|
|
49 |
\def\map{\textit{map}}
|
|
50 |
\def\blexers{\mathit{blexer\_simp}}
|
|
51 |
\def\simp{\mathit{simp}}
|
|
52 |
\def\mkeps{\mathit{mkeps}}
|
|
53 |
\def\bmkeps{\textit{bmkeps}}
|
|
54 |
\def\inj{\mathit{inj}}
|
|
55 |
\def\Empty{\mathit{Empty}}
|
|
56 |
\def\Left{\mathit{Left}}
|
|
57 |
\def\Right{\mathit{Right}}
|
|
58 |
\def\Stars{\mathit{Stars}}
|
|
59 |
\def\Char{\mathit{Char}}
|
|
60 |
\def\Seq{\mathit{Seq}}
|
|
61 |
\def\Der{\mathit{Der}}
|
|
62 |
\def\nullable{\mathit{nullable}}
|
|
63 |
\def\Z{\mathit{Z}}
|
|
64 |
\def\S{\mathit{S}}
|
|
65 |
\def\flex{\textit{flex}}
|
|
66 |
\def\rup{r^\uparrow}
|
|
67 |
\def\retrieve{\textit{retrieve}}
|
|
68 |
\def\AALTS{\textit{AALTS}}
|
|
69 |
\def\AONE{\textit{AONE}}
|
|
70 |
%\theoremstyle{theorem}
|
|
71 |
%\newtheorem{theorem}{Theorem}
|
|
72 |
%\theoremstyle{lemma}
|
|
73 |
%\newtheorem{lemma}{Lemma}
|
|
74 |
%\newcommand{\lemmaautorefname}{Lemma}
|
|
75 |
%\theoremstyle{definition}
|
|
76 |
%\newtheorem{definition}{Definition}
|
|
77 |
\algnewcommand\algorithmicswitch{\textbf{switch}}
|
|
78 |
\algnewcommand\algorithmiccase{\textbf{case}}
|
|
79 |
\algnewcommand\algorithmicassert{\texttt{assert}}
|
|
80 |
\algnewcommand\Assert[1]{\State \algorithmicassert(#1)}%
|
|
81 |
% New "environments"
|
|
82 |
\algdef{SE}[SWITCH]{Switch}{EndSwitch}[1]{\algorithmicswitch\ #1\ \algorithmicdo}{\algorithmicend\ \algorithmicswitch}%
|
|
83 |
\algdef{SE}[CASE]{Case}{EndCase}[1]{\algorithmiccase\ #1}{\algorithmicend\ \algorithmiccase}%
|
|
84 |
\algtext*{EndSwitch}%
|
|
85 |
\algtext*{EndCase}%
|
|
86 |
|
|
87 |
|
|
88 |
\begin{document}
|
|
89 |
|
|
90 |
\maketitle
|
147
|
91 |
Suppose (basic) regular expressions are given by the following grammar:
|
146
|
92 |
|
147
|
93 |
\[ r ::= \ZERO \mid \ONE
|
|
94 |
\mid c
|
|
95 |
\mid r_1 \cdot r_2
|
|
96 |
\mid r_1 + r_2
|
|
97 |
\mid r^*
|
|
98 |
\]
|
|
99 |
|
|
100 |
|
|
101 |
If we let the alphabet
|
|
102 |
where $c$ is selected from
|
|
103 |
be $\sum = \{0,1\}$,
|
148
|
104 |
then we can define the regular expression
|
|
105 |
style bitcodes:
|
147
|
106 |
|
|
107 |
|
148
|
108 |
\[ rbs ::= \ZERO \mid \ONE
|
147
|
109 |
\mid 1
|
|
110 |
\mid 0
|
148
|
111 |
\mid rbs_1 \cdot rbs_2
|
|
112 |
\mid \sum{rbs_{list}}
|
|
113 |
\mid rbs^*
|
147
|
114 |
\]
|
|
115 |
|
148
|
116 |
This is our list definition of bitcodes:
|
|
117 |
\begin{center}
|
|
118 |
$b ::= 1 \mid 0 \qquad
|
|
119 |
bs ::= [] \mid b::bs $
|
|
120 |
\end{center}
|
|
121 |
|
|
122 |
Define an isomorphism between bitcodes based on lists
|
|
123 |
and bitcodes based on regular expressions:
|
146
|
124 |
\begin{center}
|
148
|
125 |
\begin{tabular}{lcl}
|
|
126 |
$\sigma: bit \; list$ & $\Rightarrow$ & $bit \; regex$\\
|
|
127 |
$\delta: bit$ & $\Rightarrow$ & $bit \; regex$\\
|
|
128 |
$\sigma([])$ & $=$ & $ \ONE$\\
|
|
129 |
$\sigma(b::bs)$ & $=$ & $\delta(b) \cdot \sigma(bs)$\\
|
|
130 |
$\delta(b) $ & $=$ & $b\; as \;a \;regex$
|
|
131 |
\end{tabular}
|
147
|
132 |
\end{center}
|
|
133 |
|
|
134 |
\emph{Annotated regular expressions} can be defined by the following
|
148
|
135 |
grammar using the list definition of $bs$ :
|
147
|
136 |
|
|
137 |
\begin{center}
|
|
138 |
\begin{tabular}{lcl}
|
|
139 |
$\textit{a}$ & $::=$ & $\ZERO$\\
|
|
140 |
& $\mid$ & $_{bs}\ONE$\\
|
|
141 |
& $\mid$ & $_{bs}{\bf c}$\\
|
|
142 |
& $\mid$ & $_{bs}\sum\,as$\\
|
|
143 |
& $\mid$ & $_{bs}a_1\cdot a_2$\\
|
|
144 |
& $\mid$ & $_{bs}a^*$
|
|
145 |
\end{tabular}
|
|
146 |
\end{center}
|
148
|
147 |
%Let the set of all bitcoded regular expressions be $\textit{BS}$.
|
|
148 |
%Let the set of all annotated regular expression be $\textit{AR}$.
|
|
149 |
Let us play with the function $f: \textit{annotated}\; \textit{regex} \Rightarrow \textit{bit}\; \textit{regex}$ on annotated regular expressions:
|
147
|
150 |
\begin{center}
|
148
|
151 |
\begin{tabular}{lcl}
|
|
152 |
$f(\ZERO)$ & $=$ & $\ZERO$\\
|
|
153 |
$f(_{bs}\ONE)$ & $=$ & $\sigma(\textit{bs})$\\
|
|
154 |
$f(_{bs}a)$ & $=$ & $\sigma(\textit{bs}) $\\
|
|
155 |
$f(_{bs}r_1\cdot r_2)$ & $=$ & $\sigma(\textit{bs}) \cdot( f(r_1) \cdot f(r_2))$\\
|
|
156 |
$f(_{bs}\sum{rs})$ & $=$ & $\sigma(\textit{bs}) \cdot \sum\limits_{r \in rs}{f(\textit{r})}$\\
|
|
157 |
$f(_{bs}r^*)$ & $=$ & $\sigma(\textit{bs}) \cdot((0 \cdot f(r))^*\cdot 1) $
|
|
158 |
\end{tabular}
|
|
159 |
\end{center}
|
|
160 |
|
|
161 |
Now define function $L: bit\;regex \Rightarrow set \; bs$ as follows:
|
|
162 |
\begin{center}
|
|
163 |
\begin{tabular}{lcl}
|
|
164 |
$L(\ZERO)$ & $=$ & $\emptyset$\\
|
|
165 |
$L(\ONE) $ & $= $ & $\{[]\}$\\
|
|
166 |
$L(b\;as \; a\;regex) $ & $=$ & $\{b\; as \; a \; bit\}$\\
|
|
167 |
$L(bs_1 \cdot bs_2)$ & $=$ & $L(bs_1) \times L(bs_2)$\\
|
|
168 |
$L(\sum bss) $ & $=$ &$\bigcup\limits_{bs \in bss}{L(bs)}$\\
|
|
169 |
$L(bs^*)$ & $= $ & $\bigcup\limits_{0 \leq n}{(L(bs))^n}$
|
|
170 |
\end{tabular}
|
146
|
171 |
\end{center}
|
|
172 |
|
147
|
173 |
We claim that:
|
|
174 |
\begin{center}
|
148
|
175 |
$L(f(a) )= \{bs \mid a \gg bs\}$.
|
147
|
176 |
\end{center}
|
146
|
177 |
|
148
|
178 |
where contains is defined this way:
|
|
179 |
\begin{center}
|
|
180 |
\begin{tabular}{llclll}
|
|
181 |
& & & $_{bs}\ONE$ & $\gg$ & $\textit{bs}$\\
|
|
182 |
& & & $_{bs}{\bf c}$ & $\gg$ & $\textit{bs}$\\
|
|
183 |
$\textit{if} \; r_1 \gg \textit{bs}_1$ & $r_2 \; \gg \textit{bs}_2$
|
|
184 |
& $\textit{then}$ &
|
|
185 |
$_{bs}{r_1 \cdot r_2}$ &
|
|
186 |
$\gg$ &
|
|
187 |
$\textit{bs} @ \textit{bs}_1 @ \textit{bs}_2$\\
|
|
188 |
|
|
189 |
$\textit{if} \; r \gg \textit{bs}_1$ & & $\textit{then}$ &
|
|
190 |
$_{bs}{\sum(r :: \textit{rs}})$ &
|
|
191 |
$\gg$ &
|
|
192 |
$\textit{bs} @ \textit{bs}_1 $\\
|
|
193 |
|
|
194 |
$\textit{if} \; _{bs}(\sum \textit{rs}) \gg \textit{bs} @ \textit{bs}_1$ & & $\textit{then}$ &
|
|
195 |
$_{bs}{\sum(r :: \textit{rs}})$ &
|
|
196 |
$\gg$ &
|
|
197 |
$\textit{bs} @ \textit{bs}_1 $\\
|
|
198 |
|
|
199 |
$\textit{if} \; r \gg \textit{bs}_1\; \textit{and}$ & $_{bs}r^* \gg \textit{bs} @ \textit{bs}_2$ & $\textit{then}$ &
|
|
200 |
$_{bs}r^* $ &
|
|
201 |
$\gg$ &
|
|
202 |
$\textit{bs} @ [0] @ \textit{bs}_1@ \textit{bs}_2 $\\
|
|
203 |
|
|
204 |
& & & $_{bs}r^*$ & $\gg$ & $\textit{bs} @ [1]$\\
|
|
205 |
\end{tabular}
|
|
206 |
\end{center}
|
|
207 |
|
|
208 |
Moreover, we claim the fact that
|
|
209 |
\begin{center}
|
|
210 |
$L(f(a^*)) != L(f(a^* \backslash a))$
|
|
211 |
\end{center}
|
|
212 |
This is by $\exists bs \in f(a^*), s.t. bs \notin f(a^* \backslash a)$.
|
|
213 |
Proof for the fact
|
|
214 |
$L(f(a) )= \{bs \mid a \gg bs\}$:
|
|
215 |
|
146
|
216 |
\bibliographystyle{plain}
|
|
217 |
\bibliography{root}
|
|
218 |
|
|
219 |
|
|
220 |
\end{document}
|
|
221 |
|