--- a/thys/Paper/Paper.thy Wed Mar 02 14:06:13 2016 +0000
+++ b/thys/Paper/Paper.thy Wed Mar 02 14:35:50 2016 +0000
@@ -9,17 +9,21 @@
"der_syn r c \<equiv> der c r"
notation (latex output)
- If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and
+ If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and
Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and
+
ZERO ("\<^bold>0" 80) and
ONE ("\<^bold>1" 80) and
CHAR ("_" [1000] 80) and
ALT ("_ + _" [77,77] 78) and
SEQ ("_ \<cdot> _" [77,77] 78) and
STAR ("_\<^sup>\<star>" [1000] 78) and
+
+ val.Void ("'(')" 78) and
val.Char ("Char _" [1000] 78) and
val.Left ("Left _" [1000] 78) and
val.Right ("Right _" [1000] 78) and
+
L ("L'(_')" [10] 78) and
der_syn ("_\\_" [79, 1000] 76) and
flat ("|_|" [70] 73) and
@@ -153,15 +157,14 @@
section {* Preliminaries *}
-text {* \noindent Strings in Isabelle/HOL are lists of characters with
- the empty string being represented by the empty list, written @{term
- "[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}.
- Often we use the usual bracket notation for strings; for example a
- string consisting of a single character is written @{term "[c]"}. By
- using the type @{type char} for characters we have a supply of
- finitely many characters roughly corresponding to the ASCII
- character set. Regular expressions are defined as usual as the
- following inductive datatype:
+text {* \noindent Strings in Isabelle/HOL are lists of characters with the
+empty string being represented by the empty list, written @{term "[]"}, and
+list-cons being written as @{term "DUMMY # DUMMY"}. Often we use the usual
+bracket notation for strings; for example a string consisting of just a
+single character is written @{term "[c]"}. By using the type @{type char}
+for characters we have a supply of finitely many characters roughly
+corresponding to the ASCII character set. Regular expressions are defined as
+usual as the following inductive datatype:
\begin{center}
@{text "r :="}
@@ -249,6 +252,45 @@
section {* POSIX Regular Expression Matching *}
+text {*
+
+The clever idea in \cite{Sulzmann2014} is to define a function on values that mirrors
+(but inverts) the construction of the derivative on regular expressions. We
+begin with the case of a nullable regular expression: from the nullability
+we need to construct a value that witnesses the nullability. This is as
+follows. The @{const mkeps} function (from \cite{Sulzmann2014}) is a partial (but
+unambiguous) function from regular expressions to values, total on exactly
+the set of nullable regular expressions.
+
+ \begin{center}
+ \begin{tabular}{lcl}
+ @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
+ @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\
+ @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\
+ \end{tabular}
+ \end{center}
+
+The well-known idea of POSIX lexing is informally defined in (for example)
+\cite{posix}; as correctly argued in \cite{Sulzmann2014}, this needs formal
+specification. The rough idea is that, in contrast to the so-called GREEDY
+algorithm, POSIX lexing chooses to match more deeply and using left choices
+rather than a right choices. For example, note that to match the string
+@{term "[a, b]"} with the regular expression $(a + \mts)\circ (b+ab)$ the matching
+will return $( Void, Right(ab))$ rather than $(Left\ a, Left\ b)$. [The
+regular expression $ab$ is short for $(Lit\ a) \circ (Lit\ b)$.] Similarly,
+to match {\em ``a''} with $(a+a)$ the leftmost $a$ will be chosen.
+
+We use a simple inductive definition to specify this notion, incorporating
+the POSIX-specific choices into the side-conditions for the rules $R tl
++_2$, $R tl\circ$ and $R tl*$ (as they are now called). By contrast,
+\cite{Sulzmann2014} defines a relation between values and argues that there is a
+maximum value, as given by the derivative-based algorithm yet to be spelt
+out. The relation we define is ternary, relating strings, values and regular
+expressions.
+
+*}
+
section {* The Argument by Sulzmmann and Lu *}
section {* Conclusion *}
@@ -278,20 +320,6 @@
@{term "Stars vs"}
\end{center}
- \noindent
- The language of a regular expression
-
- \begin{center}
- \begin{tabular}{lcl}
- @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\
- @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\
- @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\
- @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\
- @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\
- @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\
- \end{tabular}
- \end{center}
-
\noindent
@@ -312,14 +340,7 @@
\noindent
The @{const mkeps} function
- \begin{center}
- \begin{tabular}{lcl}
- @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
- @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
- @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\
- @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\
- \end{tabular}
- \end{center}
+
\noindent
The @{text inj} function
--- a/thys/ReStar.thy Wed Mar 02 14:06:13 2016 +0000
+++ b/thys/ReStar.thy Wed Mar 02 14:35:50 2016 +0000
@@ -136,7 +136,7 @@
where
"der c (ZERO) = ZERO"
| "der c (ONE) = ZERO"
-| "der c (CHAR c') = (if c = c' then ONE else ZERO)"
+| "der c (CHAR d) = (if c = d then ONE else ZERO)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) =
(if nullable r1
@@ -399,7 +399,7 @@
apply(erule Prf.cases)
apply(simp_all)[7]
apply(simp)
-apply(case_tac "c = c'")
+apply(case_tac "c = d")
apply(simp)
apply(auto)[1]
apply(erule Prf.cases)
@@ -1168,7 +1168,7 @@
apply(erule PMatch.cases)
apply(simp_all)[7]
(* CHAR case *)
-apply(case_tac "c = c'")
+apply(case_tac "c = d")
apply(simp)
apply(erule PMatch.cases)
apply(simp_all)[7]
Binary file thys/paper.pdf has changed