13 |
13 |
14 |
14 |
15 |
15 |
16 declare [[show_question_marks = false]] |
16 declare [[show_question_marks = false]] |
17 |
17 |
|
18 syntax (latex output) |
|
19 "_Collect" :: "pttrn => bool => 'a set" ("(1{_ \<^raw:\mbox{\boldmath$\mid$}> _})") |
|
20 "_CollectIn" :: "pttrn => 'a set => bool => 'a set" ("(1{_ \<in> _ |e _})") |
|
21 |
|
22 |
18 abbreviation |
23 abbreviation |
19 "der_syn r c \<equiv> der c r" |
24 "der_syn r c \<equiv> der c r" |
20 |
25 |
21 abbreviation |
26 abbreviation |
22 "ders_syn r s \<equiv> ders s r" |
27 "ders_syn r s \<equiv> ders s r" |
23 |
28 |
24 |
29 |
25 abbreviation |
30 abbreviation |
26 "nprec v1 v2 \<equiv> \<not>(v1 :\<sqsubset>val v2)" |
31 "nprec v1 v2 \<equiv> \<not>(v1 :\<sqsubset>val v2)" |
|
32 |
|
33 |
27 |
34 |
28 |
35 |
29 notation (latex output) |
36 notation (latex output) |
30 If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and |
37 If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and |
31 Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [75,73] 73) and |
38 Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [75,73] 73) and |
32 |
39 |
33 ZERO ("\<^bold>0" 78) and |
40 ZERO ("\<^bold>0" 78) and |
34 ONE ("\<^bold>1" 78) and |
41 ONE ("\<^bold>1" 1000) and |
35 CHAR ("_" [1000] 80) and |
42 CHAR ("_" [1000] 80) and |
36 ALT ("_ + _" [77,77] 78) and |
43 ALT ("_ + _" [77,77] 78) and |
37 SEQ ("_ \<cdot> _" [77,77] 78) and |
44 SEQ ("_ \<cdot> _" [77,77] 78) and |
38 STAR ("_\<^sup>\<star>" [1000] 78) and |
45 STAR ("_\<^sup>\<star>" [1000] 78) and |
39 |
46 |
51 Sequ ("_ @ _" [78,77] 63) and |
58 Sequ ("_ @ _" [78,77] 63) and |
52 injval ("inj _ _ _" [79,77,79] 76) and |
59 injval ("inj _ _ _" [79,77,79] 76) and |
53 mkeps ("mkeps _" [79] 76) and |
60 mkeps ("mkeps _" [79] 76) and |
54 length ("len _" [73] 73) and |
61 length ("len _" [73] 73) and |
55 intlen ("len _" [73] 73) and |
62 intlen ("len _" [73] 73) and |
|
63 set ("_" [73] 73) and |
56 |
64 |
57 Prf ("_ : _" [75,75] 75) and |
65 Prf ("_ : _" [75,75] 75) and |
|
66 CPrf ("_ \<^raw:\mbox{\textbf{\textlengthmark}}> _" [75,75] 75) and |
58 Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and |
67 Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and |
59 |
68 |
60 lexer ("lexer _ _" [78,78] 77) and |
69 lexer ("lexer _ _" [78,78] 77) and |
61 F_RIGHT ("F\<^bsub>Right\<^esub> _") and |
70 F_RIGHT ("F\<^bsub>Right\<^esub> _") and |
62 F_LEFT ("F\<^bsub>Left\<^esub> _") and |
71 F_LEFT ("F\<^bsub>Left\<^esub> _") and |
151 |
169 |
152 \item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall |
170 \item[$\bullet$] \emph{Star Rule:} A subexpression repeated by ${}^\star$ shall |
153 not match an empty string unless this is the only match for the repetition.\smallskip |
171 not match an empty string unless this is the only match for the repetition.\smallskip |
154 |
172 |
155 \item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to |
173 \item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to |
156 be longer than no match at all. |
174 be longer than no match at all.\marginpar{Explain its purpose} |
157 \end{itemize} |
175 \end{itemize} |
158 |
176 |
159 \noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords |
177 \noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords |
160 such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} |
178 such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} |
161 recognising identifiers (say, a single character followed by |
179 recognising identifiers (say, a single character followed by |
163 @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use POSIX matching to tokenise strings, |
181 @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use POSIX matching to tokenise strings, |
164 say @{text "iffoo"} and @{text "if"}. For @{text "iffoo"} we obtain |
182 say @{text "iffoo"} and @{text "if"}. For @{text "iffoo"} we obtain |
165 by the Longest Match Rule a single identifier token, not a keyword |
183 by the Longest Match Rule a single identifier token, not a keyword |
166 followed by an identifier. For @{text "if"} we obtain by the Priority |
184 followed by an identifier. For @{text "if"} we obtain by the Priority |
167 Rule a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} |
185 Rule a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} |
168 matches also. |
186 matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} matches @{text "iffoo"}, respectively @{text "if"}, in exactly one |
|
187 `iteration' of the star. |
|
188 |
169 |
189 |
170 One limitation of Brzozowski's matcher is that it only generates a |
190 One limitation of Brzozowski's matcher is that it only generates a |
171 YES/NO answer for whether a string is being matched by a regular |
191 YES/NO answer for whether a string is being matched by a regular |
172 expression. Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher |
192 expression. Sulzmann and Lu~\cite{Sulzmann2014} extended this matcher |
173 to allow generation not just of a YES/NO answer but of an actual |
193 to allow generation not just of a YES/NO answer but of an actual |
183 r}, and to show that (once a string to be matched is chosen) there is |
203 r}, and to show that (once a string to be matched is chosen) there is |
184 a maximum element and that it is computed by their derivative-based |
204 a maximum element and that it is computed by their derivative-based |
185 algorithm. This proof idea is inspired by work of Frisch and Cardelli |
205 algorithm. This proof idea is inspired by work of Frisch and Cardelli |
186 \cite{Frisch2004} on a GREEDY regular expression matching |
206 \cite{Frisch2004} on a GREEDY regular expression matching |
187 algorithm. However, we were not able to establish transitivity and |
207 algorithm. However, we were not able to establish transitivity and |
188 totality for the ``order relation'' by Sulzmann and Lu. ??In Section |
208 totality for the ``order relation'' by Sulzmann and Lu. \marginpar{We probably drop this section} |
|
209 ??In Section |
189 \ref{argu} we identify some inherent problems with their approach (of |
210 \ref{argu} we identify some inherent problems with their approach (of |
190 which some of the proofs are not published in \cite{Sulzmann2014}); |
211 which some of the proofs are not published in \cite{Sulzmann2014}); |
191 perhaps more importantly, we give a simple inductive (and |
212 perhaps more importantly, we give a simple inductive (and |
192 algorithm-independent) definition of what we call being a {\em POSIX |
213 algorithm-independent) definition of what we call being a {\em POSIX |
193 value} for a regular expression @{term r} and a string @{term s}; we |
214 value} for a regular expression @{term r} and a string @{term s}; we |
228 \cite{Sulzmann2014} is available at the website of its first author; this |
249 \cite{Sulzmann2014} is available at the website of its first author; this |
229 extended version already includes remarks in the appendix that their |
250 extended version already includes remarks in the appendix that their |
230 informal proof contains gaps, and possible fixes are not fully worked out.} |
251 informal proof contains gaps, and possible fixes are not fully worked out.} |
231 Our specification of a POSIX value consists of a simple inductive definition |
252 Our specification of a POSIX value consists of a simple inductive definition |
232 that given a string and a regular expression uniquely determines this value. |
253 that given a string and a regular expression uniquely determines this value. |
|
254 We also show that our definition is equivalent to an ordering |
|
255 of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2013}. |
233 Derivatives as calculated by Brzozowski's method are usually more complex |
256 Derivatives as calculated by Brzozowski's method are usually more complex |
234 regular expressions than the initial one; various optimisations are |
257 regular expressions than the initial one; various optimisations are |
235 possible. We prove the correctness when simplifications of @{term "ALT ZERO |
258 possible. We prove the correctness when simplifications of @{term "ALT ZERO |
236 r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to |
259 r"}, @{term "ALT r ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to |
237 @{term r} are applied. |
260 @{term r} are applied. |
265 only the empty string and @{term c} for matching a character literal. The |
288 only the empty string and @{term c} for matching a character literal. The |
266 language of a regular expression is also defined as usual by the |
289 language of a regular expression is also defined as usual by the |
267 recursive function @{term L} with the six clauses: |
290 recursive function @{term L} with the six clauses: |
268 |
291 |
269 \begin{center} |
292 \begin{center} |
270 \begin{tabular}{l@ {\hspace{3mm}}rcl} |
293 \begin{tabular}{l@ {\hspace{4mm}}rcl} |
271 (1) & @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ |
294 (1) & @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ |
272 (2) & @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ |
295 (2) & @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ |
273 (3) & @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ |
296 (3) & @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ |
274 \end{tabular} |
|
275 \hspace{14mm} |
|
276 \begin{tabular}{l@ {\hspace{3mm}}rcl} |
|
277 (4) & @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
297 (4) & @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
278 (5) & @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
298 (5) & @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
279 (6) & @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ |
299 (6) & @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ |
280 \end{tabular} |
300 \end{tabular} |
281 \end{center} |
301 \end{center} |
323 @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ |
343 @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ |
324 @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ |
344 @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ |
325 @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ |
345 @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ |
326 @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
346 @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
327 @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
347 @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
328 @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ |
348 @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}%\medskip\\ |
329 |
349 \end{tabular} |
330 %\end{tabular} |
350 \end{center} |
331 %\end{center} |
351 |
332 |
352 \begin{center} |
333 %\begin{center} |
353 \begin{tabular}{lcl} |
334 %\begin{tabular}{lcl} |
|
335 |
|
336 @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ |
354 @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ |
337 @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ |
355 @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ |
338 @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ |
356 @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ |
339 @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
357 @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
340 @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
358 @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
434 @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} \qquad |
452 @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} \qquad |
435 @{thm[mode=Rule] Prf.intros(6)[of "vs"]} |
453 @{thm[mode=Rule] Prf.intros(6)[of "vs"]} |
436 \end{tabular} |
454 \end{tabular} |
437 \end{center} |
455 \end{center} |
438 |
456 |
439 \noindent Note that no values are associated with the regular expression |
457 \noindent |
|
458 where in the clause for @{const "Stars"} we use the notation @{term "v \<in> set vs"} |
|
459 for indicating that @{text v} is a member in the list @{text vs}. |
|
460 Note that no values are associated with the regular expression |
440 @{term ZERO}, and that the only value associated with the regular |
461 @{term ZERO}, and that the only value associated with the regular |
441 expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text |
462 expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text |
442 "Void"}. It is routine to establish how values ``inhabiting'' a regular |
463 "Void"}. It is routine to establish how values ``inhabiting'' a regular |
443 expression correspond to the language of a regular expression, namely |
464 expression correspond to the language of a regular expression, namely |
444 |
465 |
445 \begin{proposition} |
466 \begin{proposition} |
446 @{thm L_flat_Prf} |
467 @{thm L_flat_Prf} |
447 \end{proposition} |
468 \end{proposition} |
448 |
469 |
449 In general there is more than one value associated with a regular |
470 \noindent |
450 expression. In case of POSIX matching the problem is to calculate the |
471 Given a regular expression @{text r} and a string @{text s}, we can define the |
451 unique value that satisfies the (informal) POSIX rules from the |
472 set of all \emph{Lexical Values} inhabited by @{text r} with the underlying string |
452 Introduction. Graphically the POSIX value calculation algorithm by |
473 being @{text s} by |
|
474 |
|
475 \begin{center} |
|
476 @{thm LV_def} |
|
477 \end{center} |
|
478 |
|
479 \noindent However, later on it will sometimes be necessary to |
|
480 restrict the set of lexical values to a subset called |
|
481 \emph{Canonical Values}. The idea of canonical values is that they |
|
482 satisfy the Star Rule (see Introduction) where the $^\star$ does not |
|
483 match the empty string unless this is the only match for the |
|
484 repetition. One way to define canonical values formally is to use a |
|
485 stronger inhabitation relation, written @{term "\<Turnstile> DUMMY : DUMMY"}, which has the same rules as @{term |
|
486 "\<turnstile> DUMMY : DUMMY"} shown above, except that the rule for |
|
487 @{term Stars} has |
|
488 the additional side-condition of flattened values not being the |
|
489 empty string, namely |
|
490 |
|
491 \begin{center} |
|
492 @{thm [mode=Rule] CPrf.intros(6)} |
|
493 \end{center} |
|
494 |
|
495 \noindent |
|
496 With this we can define |
|
497 |
|
498 \begin{center} |
|
499 @{thm CV_def} |
|
500 \end{center} |
|
501 |
|
502 \noindent |
|
503 Clearly we have @{thm LV_CV_subset}. |
|
504 The main point of canonical values is that for every regular expression @{text r} and every |
|
505 string @{text s}, the set @{term "CV r s"} is finite. |
|
506 |
|
507 \begin{lemma} |
|
508 @{thm CV_finite} |
|
509 \end{lemma} |
|
510 |
|
511 \noindent This finiteness property does not generally hold for lexical values where |
|
512 for example @{term "LV (STAR ONE) []"} contains infinitely many |
|
513 values, but @{thm CV_STAR_ONE_empty}. However, if a regular |
|
514 expression @{text r} matches a string @{text s}, then in general the |
|
515 set @{term "CV r s"} is not just a |
|
516 singleton set. In case of POSIX matching the problem is to |
|
517 calculate the unique value that satisfies the (informal) POSIX rules |
|
518 from the Introduction. It will turn out that this POSIX value is in fact a |
|
519 canonical value. |
|
520 |
|
521 Graphically the POSIX value calculation algorithm by |
453 Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} |
522 Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} |
454 where the path from the left to the right involving @{term derivatives}/@{const |
523 where the path from the left to the right involving @{term derivatives}/@{const |
455 nullable} is the first phase of the algorithm (calculating successive |
524 nullable} is the first phase of the algorithm (calculating successive |
456 \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to |
525 \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to |
457 left, the second phase. This picture shows the steps required when a |
526 left, the second phase. This picture shows the steps required when a |
631 match the string, then @{const Some} value is returned. One important |
700 match the string, then @{const Some} value is returned. One important |
632 virtue of this algorithm is that it can be implemented with ease in any |
701 virtue of this algorithm is that it can be implemented with ease in any |
633 functional programming language and also in Isabelle/HOL. In the remaining |
702 functional programming language and also in Isabelle/HOL. In the remaining |
634 part of this section we prove that this algorithm is correct. |
703 part of this section we prove that this algorithm is correct. |
635 |
704 |
636 The well-known idea of POSIX matching is informally defined by the longest |
705 The well-known idea of POSIX matching is informally defined by some |
637 match and priority rule (see Introduction); as correctly argued in \cite{Sulzmann2014}, this |
706 rules such as the longest match and priority rule (see |
|
707 Introduction); as correctly argued in \cite{Sulzmann2014}, this |
638 needs formal specification. Sulzmann and Lu define an ``ordering |
708 needs formal specification. Sulzmann and Lu define an ``ordering |
639 relation'' between values and argue |
709 relation'' between values and argue that there is a maximum value, |
640 that there is a maximum value, as given by the derivative-based algorithm. |
710 as given by the derivative-based algorithm. In contrast, we shall |
641 In contrast, we shall introduce a simple inductive definition that |
711 introduce a simple inductive definition that specifies directly what |
642 specifies directly what a \emph{POSIX value} is, incorporating the |
712 a \emph{POSIX value} is, incorporating the POSIX-specific choices |
643 POSIX-specific choices into the side-conditions of our rules. Our |
713 into the side-conditions of our rules. Our definition is inspired by |
644 definition is inspired by the matching relation given by Vansummeren |
714 the matching relation given by Vansummeren |
645 \cite{Vansummeren2006}. The relation we define is ternary and written as |
715 \cite{Vansummeren2006}. The relation we define is ternary and |
646 \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating strings, regular expressions and |
716 written as \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating |
647 values. |
717 strings, regular expressions and values; the inductive rules are given in |
|
718 Figure~\ref{POSIXrules}. |
|
719 We can prove that given a string @{term s} and regular expression @{term |
|
720 r}, the POSIX value @{term v} is uniquely determined by @{term "s \<in> r \<rightarrow> v"}. |
|
721 |
648 % |
722 % |
|
723 \begin{figure}[t] |
649 \begin{center} |
724 \begin{center} |
650 \begin{tabular}{c} |
725 \begin{tabular}{c} |
651 @{thm[mode=Axiom] Posix.intros(1)}@{text "P"}@{term "ONE"} \qquad |
726 @{thm[mode=Axiom] Posix.intros(1)}@{text "P"}@{term "ONE"} \qquad |
652 @{thm[mode=Axiom] Posix.intros(2)}@{text "P"}@{term "c"}\medskip\\ |
727 @{thm[mode=Axiom] Posix.intros(2)}@{text "P"}@{term "c"}\medskip\\ |
653 @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad |
728 @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad |
666 @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ |
741 @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ |
667 @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} |
742 @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} |
668 {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$@{text "P\<star>"} |
743 {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$@{text "P\<star>"} |
669 \end{tabular} |
744 \end{tabular} |
670 \end{center} |
745 \end{center} |
671 |
746 \caption{Our inductive definition of POSIX values.}\label{POSIXrules} |
672 \noindent |
747 \end{figure} |
673 We can prove that given a string @{term s} and regular expression @{term |
748 |
674 r}, the POSIX value @{term v} is uniquely determined by @{term "s \<in> r \<rightarrow> v"}. |
749 |
675 |
750 |
676 \begin{theorem}\mbox{}\smallskip\\\label{posixdeterm} |
751 \begin{theorem}\mbox{}\smallskip\\\label{posixdeterm} |
677 \begin{tabular}{ll} |
752 \begin{tabular}{ll} |
678 @{text "(1)"} & If @{thm (prem 1) Posix1(1)} then @{thm (concl) |
753 @{text "(1)"} & If @{thm (prem 1) Posix1(1)} then @{thm (concl) |
679 Posix1(1)} and @{thm (concl) Posix1(2)}.\\ |
754 Posix1(1)} and @{thm (concl) Posix1(2)}.\\ |
685 The second parts follows by a case analysis of @{term "s \<in> r \<rightarrow> v'"} and |
760 The second parts follows by a case analysis of @{term "s \<in> r \<rightarrow> v'"} and |
686 the first part.\qed |
761 the first part.\qed |
687 \end{proof} |
762 \end{proof} |
688 |
763 |
689 \noindent |
764 \noindent |
690 We claim that our @{term "s \<in> r \<rightarrow> v"} relation captures the idea behind the two |
765 We claim that our @{term "s \<in> r \<rightarrow> v"} relation captures the idea behind the four |
691 informal POSIX rules shown in the Introduction: Consider for example the |
766 informal POSIX rules shown in the Introduction: Consider for example the |
692 rules @{text "P+L"} and @{text "P+R"} where the POSIX value for a string |
767 rules @{text "P+L"} and @{text "P+R"} where the POSIX value for a string |
693 and an alternative regular expression, that is @{term "(s, ALT r\<^sub>1 r\<^sub>2)"}, |
768 and an alternative regular expression, that is @{term "(s, ALT r\<^sub>1 r\<^sub>2)"}, |
694 is specified---it is always a @{text "Left"}-value, \emph{except} when the |
769 is specified---it is always a @{text "Left"}-value, \emph{except} when the |
695 string to be matched is not in the language of @{term "r\<^sub>1"}; only then it |
770 string to be matched is not in the language of @{term "r\<^sub>1"}; only then it |
716 "P\<star>"}-rule. Also there we want that @{term "s\<^sub>1"} is the longest initial |
791 "P\<star>"}-rule. Also there we want that @{term "s\<^sub>1"} is the longest initial |
717 split of @{term "s\<^sub>1 @ s\<^sub>2"} and furthermore the corresponding value |
792 split of @{term "s\<^sub>1 @ s\<^sub>2"} and furthermore the corresponding value |
718 @{term v} cannot be flattened to the empty string. In effect, we require |
793 @{term v} cannot be flattened to the empty string. In effect, we require |
719 that in each ``iteration'' of the star, some non-empty substring needs to |
794 that in each ``iteration'' of the star, some non-empty substring needs to |
720 be ``chipped'' away; only in case of the empty string we accept @{term |
795 be ``chipped'' away; only in case of the empty string we accept @{term |
721 "Stars []"} as the POSIX value. |
796 "Stars []"} as the POSIX value. Indeed we can show that our POSIX value |
722 |
797 is a canonical value which excludes those @{text Stars} containing values |
|
798 that flatten to the empty string. |
|
799 |
|
800 \begin{lemma} |
|
801 @{thm [mode=IfThen] Posix_CV} |
|
802 \end{lemma} |
|
803 |
|
804 \begin{proof} |
|
805 By routine induction on @{thm (prem 1) Posix_CV}.\qed |
|
806 \end{proof} |
|
807 |
|
808 \noindent |
723 Next is the lemma that shows the function @{term "mkeps"} calculates |
809 Next is the lemma that shows the function @{term "mkeps"} calculates |
724 the POSIX value for the empty string and a nullable regular expression. |
810 the POSIX value for the empty string and a nullable regular expression. |
725 |
811 |
726 \begin{lemma}\label{lemmkeps} |
812 \begin{lemma}\label{lemmkeps} |
727 @{thm[mode=IfThen] Posix_mkeps} |
813 @{thm[mode=IfThen] Posix_mkeps} |