ninems/ninems.tex
changeset 36 97c9ac95194d
parent 35 f70e9ab4e680
child 37 17d8e7599a01
equal deleted inserted replaced
35:f70e9ab4e680 36:97c9ac95194d
   239 regular expressions \cite{Brzozowski1964}. We shall briefly explain
   239 regular expressions \cite{Brzozowski1964}. We shall briefly explain
   240 the algorithms next.
   240 the algorithms next.
   241 
   241 
   242 \section{The Algorithms by  Brzozowski, and Sulzmann and Lu}
   242 \section{The Algorithms by  Brzozowski, and Sulzmann and Lu}
   243 
   243 
   244 Suppose regular expressions are given by the following grammar:
   244 Suppose regular expressions are given by the following grammar:\\
   245 
   245 			$r$  $::=$   $\ZERO$ $\mid$  $\ONE$   
   246 
   246 			 $\mid$  $c$          
   247 \begin{center}
   247 			 $\mid$  $r_1 \cdot r_2$
   248 %TODO
   248 			 $\mid$  $r_1 + r_2$   
   249 		\begin{tabular}{@{}rrl@{}}
   249 			 $\mid$  $r^*$         
   250 			\multicolumn{3}{@{}l}{\textbf{Regular Expressions}}\medskip\\
   250 
   251 			$r$ & $::=$  & $\ZERO$\\
   251 
   252 			& $\mid$ & $\ONE$   \\
       
   253 			& $\mid$ & $c$          \\
       
   254 			& $\mid$ & $r_1 \cdot r_2$\\
       
   255 			& $\mid$ & $r_1 + r_2$   \\
       
   256 			
       
   257 			& $\mid$ & $r^*$         
       
   258 		\end{tabular}
       
   259 
       
   260 \end{center}
       
   261 
   252 
   262 \noindent
   253 \noindent
   263 The intended meaning of the regular expressions is as usual: $\ZERO$
   254 The intended meaning of the regular expressions is as usual: $\ZERO$
   264 cannot match any string, $\ONE$ can match the empty string, the
   255 cannot match any string, $\ONE$ can match the empty string, the
   265 character regular expression $c$ can match the character $c$, and so
   256 character regular expression $c$ can match the character $c$, and so
   266 on. The brilliant contribution by Brzozowski is the notion of
   257 on. The brilliant contribution by Brzozowski is the notion of
   267 \emph{derivatives} of regular expressions.  The idea behind this
   258 \emph{derivatives} of regular expressions.  The idea behind this
   268 notion is as follows: suppose a regular expression $r$ can match a
   259 notion is as follows: suppose a regular expression $r$ can match a
   269 string of the form $c\!::\! s$ (that is a list of characters starting
   260 string of the form $c\!::\! s$ (that is a list of characters starting
   270 with $c$), what does the regular expression look like that can match
   261 with $c$), what does the regular expression look like that can match
   271 just $s$? Brzozowski gave a neat answer to this question. He defined
   262 just $s$? Brzozowski gave a neat answer to this question. He started with the definition of $nullable$:
   272 the following operation on regular expressions, written
       
   273 $r\backslash c$ (the derivative of $r$ w.r.t.~the character $c$):
       
   274 
       
   275 \begin{center}
       
   276 \begin{tabular}{lcl}
       
   277 		$\ZERO \backslash c$ & $\dn$ & $\ZERO$\\  
       
   278 		$\ONE \backslash c$  & $\dn$ & $\ZERO$\\
       
   279 		$d \backslash c$     & $\dn$ & 
       
   280 		$\mathit{if} \;c = d\;\mathit{then}\;\ONE\;\mathit{else}\;\ZERO$\\
       
   281 $(r_1 + r_2)\backslash c$     & $\dn$ & $r_1 \backslash c \,+\, r_2 \backslash c$\\
       
   282 $(r_1 \cdot r_2)\backslash c$ & $\dn$ & $\mathit{if} \, \epsilon \in L(r_1)$\\
       
   283 	&   & $\mathit{then}\;(r_1\backslash c) \cdot r_2 \,+\, r_2\backslash c$\\
       
   284 	&   & $\mathit{else}\;(r_1\backslash c) \cdot r_2$\\
       
   285 	$(r^*)\backslash c$           & $\dn$ & $(r\backslash c) \cdot r^*$\\
       
   286 \end{tabular}
       
   287 \end{center}
       
   288 
       
   289 \noindent
       
   290 The $\mathit{if}$ condition in the definition of $(r_1 \cdot r_2) \backslash c$ involves a membership testing: $\epsilon \overset{?}{\in} L(r_1)$.
       
   291 Such testing is easily implemented by the following simple recursive function $\nullable(\_)$:
       
   292 
       
   293 
       
   294 \begin{center}
   263 \begin{center}
   295 		\begin{tabular}{lcl}
   264 		\begin{tabular}{lcl}
   296 			$\nullable(\ZERO)$     & $\dn$ & $\mathit{false}$ \\  
   265 			$\nullable(\ZERO)$     & $\dn$ & $\mathit{false}$ \\  
   297 			$\nullable(\ONE)$      & $\dn$ & $\mathit{true}$ \\
   266 			$\nullable(\ONE)$      & $\dn$ & $\mathit{true}$ \\
   298 			$\nullable(c)$ 	       & $\dn$ & $\mathit{false}$ \\
   267 			$\nullable(c)$ 	       & $\dn$ & $\mathit{false}$ \\
   299 			$\nullable(r_1 + r_2)$ & $\dn$ & $\nullable(r_1) \vee \nullable(r_2)$ \\
   268 			$\nullable(r_1 + r_2)$ & $\dn$ & $\nullable(r_1) \vee \nullable(r_2)$ \\
   300 			$\nullable(r_1\cdot r_2)$  & $\dn$ & $\nullable(r_1) \wedge \nullable(r_2)$ \\
   269 			$\nullable(r_1\cdot r_2)$  & $\dn$ & $\nullable(r_1) \wedge \nullable(r_2)$ \\
   301 			$\nullable(r^*)$       & $\dn$ & $\mathit{true}$ \\
   270 			$\nullable(r^*)$       & $\dn$ & $\mathit{true}$ \\
   302 		\end{tabular}
   271 		\end{tabular}
   303 	\end{center}
   272 	\end{center}
       
   273 The function tests whether the empty string is in $L(r)$.
       
   274 He then defined
       
   275 the following operation on regular expressions, written
       
   276 $r\backslash c$ (the derivative of $r$ w.r.t.~the character $c$):
       
   277 
       
   278 \begin{center}
       
   279 \begin{tabular}{lcl}
       
   280 		$\ZERO \backslash c$ & $\dn$ & $\ZERO$\\  
       
   281 		$\ONE \backslash c$  & $\dn$ & $\ZERO$\\
       
   282 		$d \backslash c$     & $\dn$ & 
       
   283 		$\mathit{if} \;c = d\;\mathit{then}\;\ONE\;\mathit{else}\;\ZERO$\\
       
   284 $(r_1 + r_2)\backslash c$     & $\dn$ & $r_1 \backslash c \,+\, r_2 \backslash c$\\
       
   285 $(r_1 \cdot r_2)\backslash c$ & $\dn$ & $\mathit{if} \, nullable(r_1)$\\
       
   286 	&   & $\mathit{then}\;(r_1\backslash c) \cdot r_2 \,+\, r_2\backslash c$\\
       
   287 	&   & $\mathit{else}\;(r_1\backslash c) \cdot r_2$\\
       
   288 	$(r^*)\backslash c$           & $\dn$ & $(r\backslash c) \cdot r^*$\\
       
   289 \end{tabular}
       
   290 \end{center}
       
   291 
       
   292 \noindent
       
   293 
       
   294 
   304 
   295 
   305  %Assuming the classic notion of a
   296  %Assuming the classic notion of a
   306 %\emph{language} of a regular expression, written $L(\_)$, t
   297 %\emph{language} of a regular expression, written $L(\_)$, t
   307 The main
   298 The main
   308 property of the derivative operation is that
   299 property of the derivative operation is that
   317 matches with a regular expression $r$, build the derivatives of $r$
   308 matches with a regular expression $r$, build the derivatives of $r$
   318 w.r.t.\ (in succession) all the characters of the string $s$. Finally,
   309 w.r.t.\ (in succession) all the characters of the string $s$. Finally,
   319 test whether the resulting regular expression can match the empty
   310 test whether the resulting regular expression can match the empty
   320 string.  If yes, then $r$ matches $s$, and no in the negative
   311 string.  If yes, then $r$ matches $s$, and no in the negative
   321 case.\\
   312 case.\\
   322 If we define the successive derivative operation to be like this:
   313 We can generalise the derivative operation for strings like this:
   323 \begin{center}
   314 \begin{center}
   324 \begin{tabular}{lcl}
   315 \begin{tabular}{lcl}
   325 $r \backslash (c\!::\!s) $ & $\dn$ & $(r \backslash c) \backslash s$ \\
   316 $r \backslash (c\!::\!s) $ & $\dn$ & $(r \backslash c) \backslash s$ \\
   326 $r \backslash \epsilon $ & $\dn$ & $r$
   317 $r \backslash \epsilon $ & $\dn$ & $r$
   327 \end{tabular}
   318 \end{tabular}
   328 \end{center}
   319 \end{center}
   329 
       
   330  For us the main advantage is that derivatives can be
   320  For us the main advantage is that derivatives can be
   331 straightforwardly implemented in any functional programming language,
   321 straightforwardly implemented in any functional programming language,
   332 and are easily definable and reasoned about in theorem provers---the
   322 and are easily definable and reasoned about in theorem provers---the
   333 definitions just consist of inductive datatypes and simple recursive
   323 definitions just consist of inductive datatypes and simple recursive
   334 functions. Moreover, the notion of derivatives can be easily
   324 functions. Moreover, the notion of derivatives can be easily
   380 	\end{tabular}
   370 	\end{tabular}
   381 \end{center}
   371 \end{center}
   382 
   372 
   383 \noindent
   373 \noindent
   384  Here we put the regular expression and values of the same shape on the same level to illustrate the corresponding relation between them. 
   374  Here we put the regular expression and values of the same shape on the same level to illustrate the corresponding relation between them. 
   385  
   375  The flatten notation $| v |$ means extracting the characters in the value $v$ to form a string. For example, $|\mathit{Seq} \, \mathit{Char(c)} \, \mathit{Char(d)}|$ = $cd$. We omit this straightforward definition.
   386  Values are a way of expressing parse trees(the tree structure that tells how a sub-regex matches a substring). For example, $\Seq\,v_1\, v_2$ tells us how the string $|v_1| \cdot |v_2|$ matches the regex $r_1 \cdot r_2$: $r_1$ matches $|v_1|$ and $r_2$ matches $|v_2|$. Exactly how these two are matched are contained in the sub-structure of $v_1$ and $v_2$. The flatten notation $| v |$ means extracting the characters in the value $v$ to form a string. For example, $|\mathit{Seq} \, \mathit{Char(c)} \, \mathit{Char(d)}|$ = $cd$.
   376  Values are a way of expressing parse trees(the tree structure that tells how a sub-regex matches a substring). For example, $\Seq\,v_1\, v_2$ tells us how the string $|v_1| \cdot |v_2|$ matches the regex $r_1 \cdot r_2$: $r_1$ matches $|v_1|$ and $r_2$ matches $|v_2|$. Exactly how these two are matched are contained in the sub-structure of $v_1$ and $v_2$. 
   387 
   377 
   388  To give a concrete example of how value works, consider the string $xy$ and the
   378  To give a concrete example of how value works, consider the string $xy$ and the
   389 regular expression $(x + (y + xy))^*$. We can view this regular
   379 regular expression $(x + (y + xy))^*$. We can view this regular
   390 expression as a tree and if the string $xy$ is matched by two Star
   380 expression as a tree and if the string $xy$ is matched by two Star
   391 ``iterations'', then the $x$ is matched by the left-most alternative
   381 ``iterations'', then the $x$ is matched by the left-most alternative
   415 The contribution of Sulzmann and Lu is an extension of Brzozowski's
   405 The contribution of Sulzmann and Lu is an extension of Brzozowski's
   416 algorithm by a second phase (the first phase being building successive
   406 algorithm by a second phase (the first phase being building successive
   417 derivatives). In this second phase, for every successful match the
   407 derivatives). In this second phase, for every successful match the
   418 corresponding POSIX value is computed. The whole process looks like the following diagram(the working flow of the simple matching algorithm that just gives a $YES/NO$ answer is provided on the right for the purpose of comparison):\\
   408 corresponding POSIX value is computed. The whole process looks like the following diagram(the working flow of the simple matching algorithm that just gives a $YES/NO$ answer is provided on the right for the purpose of comparison):\\
   419 \begin{tikzcd}
   409 \begin{tikzcd}
   420 r_0 \arrow[r, "c_0"]  \arrow[d] & r_1 \arrow[r, "c_1"] \arrow[d] & r_2 \arrow[r, dashed] \arrow[d] & r_n \arrow[d, "mkeps" description] \\
   410 r_0 \arrow[r, "\backslash c_0"]  \arrow[d] & r_1 \arrow[r, "\backslash c_1"] \arrow[d] & r_2 \arrow[r, dashed] \arrow[d] & r_n \arrow[d, "mkeps" description] \\
   421 v_0           & v_1 \arrow[l,"inj_{r_0} c_0"]                & v_2 \arrow[l, "inj_{r_1} c_1"]              & v_n \arrow[l, dashed]         
   411 v_0           & v_1 \arrow[l,"inj_{r_0} c_0"]                & v_2 \arrow[l, "inj_{r_1} c_1"]              & v_n \arrow[l, dashed]         
       
   412 \end{tikzcd}
       
   413 \begin{tikzcd}
       
   414 r_0 \arrow[r, "\backslash c_0"]  & r_1 \arrow[r, "\backslash c_1"] & r_2 \arrow[r, dashed]  & r_n   
   422 \end{tikzcd}
   415 \end{tikzcd}
   423 
   416 
   424 We shall briefly explain this interesting process.\\ For the convenience of explanation, we have the following notations: the regular expression $r$ used for matching is also called $r_0$ and the string $s$ is composed of $n$ characters $c_0 c_1 ... c_{n-1}$.
   417 We shall briefly explain this interesting process.\\ For the convenience of explanation, we have the following notations: the regular expression $r$ used for matching is also called $r_0$ and the string $s$ is composed of $n$ characters $c_0 c_1 ... c_{n-1}$.
   425 First, we do the derivative operation on $r_0$, $r_1$, ..., using characters $c_0$, $c_1$, ...  until we get the final derivative $r_n$.We test whether it is $nullable$ or not. If no we know immediately the string does not match the regex. If yes, we start building the parse tree incrementally. We first call $mkeps$(which stands for make epsilon--make the parse tree for the empty word epsilon) to construct the parse tree $v_n$ for how the final derivative $r_n$ matches the empty string:
   418 First, we do the derivative operation on $r_0$, $r_1$, ..., using characters $c_0$, $c_1$, ...  until we get the final derivative $r_n$.We test whether it is $nullable$ or not. If no we know immediately the string does not match the regex. If yes, we start building the parse tree incrementally. We first call $mkeps$(which stands for make epsilon--make the parse tree for the empty word epsilon) to construct the parse tree $v_n$ for how the final derivative $r_n$ matches the empty string:
   426 
   419 
   505 \end{tabular}    
   498 \end{tabular}    
   506 \end{center} 
   499 \end{center} 
   507 where $\Z$ and $\S$ are arbitrary names for the bits in the
   500 where $\Z$ and $\S$ are arbitrary names for the bits in the
   508 bitsequences. 
   501 bitsequences. 
   509 Here code encodes a value into a bitsequence by converting Left into $\Z$, Right into $\S$, the start point of a non-empty star iteration into $\S$, and the border where a local star terminates into $\Z$. This conversion is apparently lossy, as it throws away the character information, and does not decode the boundary between the two operands of the sequence constructor. Moreover, with only the bitcode we cannot even tell whether the $\S$s and $\Z$s are for $Left/Right$ or $Stars$. The reason for choosing this compact way of storing information is that the relatively small size of bits can be easily moved around during the lexing process. In order to recover the bitcode back into values, we will need the regular expression as the extra information and decode them back into value:\\
   502 Here code encodes a value into a bitsequence by converting Left into $\Z$, Right into $\S$, the start point of a non-empty star iteration into $\S$, and the border where a local star terminates into $\Z$. This conversion is apparently lossy, as it throws away the character information, and does not decode the boundary between the two operands of the sequence constructor. Moreover, with only the bitcode we cannot even tell whether the $\S$s and $\Z$s are for $Left/Right$ or $Stars$. The reason for choosing this compact way of storing information is that the relatively small size of bits can be easily moved around during the lexing process. In order to recover the bitcode back into values, we will need the regular expression as the extra information and decode them back into value:\\
   510 TODO: definition of decode
   503 \begin{definition}[Bitdecoding of Values]\mbox{}
   511 \\
   504 \begin{center}
       
   505 \begin{tabular}{@{}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
       
   506   $\textit{decode}'\,bs\,(\ONE)$ & $\dn$ & $(\Empty, bs)$\\
       
   507   $\textit{decode}'\,bs\,(c)$ & $\dn$ & $(\Char\,c, bs)$\\
       
   508   $\textit{decode}'\,(\Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
       
   509      $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
       
   510        (\Left\,v, bs_1)$\\
       
   511   $\textit{decode}'\,(\S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
       
   512      $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
       
   513        (\Right\,v, bs_1)$\\                           
       
   514   $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
       
   515         $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
       
   516   & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$\\
       
   517   & &   \hspace{35mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
       
   518   $\textit{decode}'\,(\Z\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
       
   519   $\textit{decode}'\,(\S\!::\!bs)\,(r^*)$ & $\dn$ & 
       
   520          $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
       
   521   & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$\\
       
   522   & &   \hspace{35mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\bigskip\\
       
   523   
       
   524   $\textit{decode}\,bs\,r$ & $\dn$ &
       
   525      $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
       
   526   & & $\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;
       
   527        \textit{else}\;\textit{None}$                       
       
   528 \end{tabular}    
       
   529 \end{center}    
       
   530 \end{definition}
   512 
   531 
   513 To do lexing using annotated regular expressions, we shall first transform the
   532 To do lexing using annotated regular expressions, we shall first transform the
   514 usual (un-annotated) regular expressions into annotated regular
   533 usual (un-annotated) regular expressions into annotated regular
   515 expressions:\\
   534 expressions:\\
   516 TODO: definition of internalise
   535 \begin{definition}
   517 \\
   536 \begin{center}
       
   537 \begin{tabular}{lcl}
       
   538   $(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\
       
   539   $(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$\\
       
   540   $(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\
       
   541   $(r_1 + r_2)^\uparrow$ & $\dn$ &
       
   542          $\textit{ALT}\;[]\,(\textit{fuse}\,[\Z]\,r_1^\uparrow)\,
       
   543                             (\textit{fuse}\,[\S]\,r_2^\uparrow)$\\
       
   544   $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
       
   545          $\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$\\
       
   546   $(r^*)^\uparrow$ & $\dn$ &
       
   547          $\textit{STAR}\;[]\,r^\uparrow$\\
       
   548 \end{tabular}    
       
   549 \end{center}    
       
   550 \end{definition}
   518 Then we do successive derivative operations on the annotated regular expression. This derivative operation is the same as what we previously have for the simple regular expressions, except that we take special care of the bits to store the parse tree information:\\
   551 Then we do successive derivative operations on the annotated regular expression. This derivative operation is the same as what we previously have for the simple regular expressions, except that we take special care of the bits to store the parse tree information:\\
   519 TODO: bder
   552 \begin{definition}{bder}
   520 \\
   553 \begin{center}
       
   554   \begin{tabular}{@{}lcl@{}}
       
   555   $(\textit{ZERO})\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
       
   556   $(\textit{ONE}\;bs)\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
       
   557   $(\textit{CHAR}\;bs\,d)\backslash c$ & $\dn$ &
       
   558         $\textit{if}\;c=d\; \;\textit{then}\;
       
   559          \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
       
   560   $(\textit{ALT}\;bs\,a_1\,a_2)\backslash c$ & $\dn$ &
       
   561         $\textit{ALT}\,bs\,(a_1\backslash c)\,(a_2\backslash c)$\\
       
   562   $(\textit{SEQ}\;bs\,a_1\,a_2)\backslash c$ & $\dn$ &
       
   563      $\textit{if}\;\textit{bnullable}\,a_1$\\
       
   564   & &$\textit{then}\;\textit{ALT}\,bs\,(\textit{SEQ}\,[]\,(a_1\backslash c)\,a_2)$\\
       
   565   & &$\phantom{\textit{then}\;\textit{ALT}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\backslash c))$\\
       
   566   & &$\textit{else}\;\textit{SEQ}\,bs\,(a_1\backslash c)\,a_2$\\
       
   567   $(\textit{STAR}\,bs\,a)\backslash c$ & $\dn$ &
       
   568       $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\backslash c))\,
       
   569        (\textit{STAR}\,[]\,r)$
       
   570 \end{tabular}    
       
   571 \end{center}    
       
   572 \end{definition}
   521 This way, we do not have to use an injection function and a second phase, but instead only need to collect the bits while running $mkeps$:
   573 This way, we do not have to use an injection function and a second phase, but instead only need to collect the bits while running $mkeps$:
   522 TODO: mkepsBC
   574 \begin{definition}[\textit{bmkeps}]\mbox{}
   523 \\
   575 \begin{center}
       
   576 \begin{tabular}{lcl}
       
   577   $\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$\\
       
   578   $\textit{bmkeps}\,(\textit{ALT}\,bs\,a_1\,a_2)$ & $\dn$ &
       
   579      $\textit{if}\;\textit{bnullable}\,a_1$\\
       
   580   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a_1$\\
       
   581   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,a_2$\\
       
   582   $\textit{bmkeps}\,(\textit{SEQ}\,bs\,a_1\,a_2)$ & $\dn$ &
       
   583      $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
       
   584   $\textit{bmkeps}\,(\textit{STAR}\,bs\,a)$ & $\dn$ &
       
   585      $bs \,@\, [\S]$
       
   586 \end{tabular}    
       
   587 \end{center}    
       
   588 \end{definition}
   524 and then decode the bits using the regular expression. The whole process looks like this:\\
   589 and then decode the bits using the regular expression. The whole process looks like this:\\
   525 r
   590 r
   526 \\
   591 \\
   527 
   592 
   528 The main point of the bitsequences and annotated regular expressions
   593 The main point of the bitsequences and annotated regular expressions
   572 	\EndCase
   637 	\EndCase
   573 \EndSwitch
   638 \EndSwitch
   574 \EndProcedure
   639 \EndProcedure
   575 \end{algorithmic}
   640 \end{algorithmic}
   576 \end{algorithm}
   641 \end{algorithm}
   577 
   642 With this simplification our previous $(a + aa)^*$ example's 8000 nodes will be reduced to only 6.
   578 
   643 
   579 Another modification is that we use simplification rules
   644 Another modification is that we use simplification rules
   580 inspired by Antimirov's work on partial derivatives. They maintain the
   645 inspired by Antimirov's work on partial derivatives. They maintain the
   581 idea that only the first ``copy'' of a regular expression in an
   646 idea that only the first ``copy'' of a regular expression in an
   582 alternative contributes to the calculation of a POSIX value. All
   647 alternative contributes to the calculation of a POSIX value. All