# HG changeset patch # User Christian Urban # Date 1456929350 0 # Node ID 28972819316481854d6fa93761ede3205fe9fcdc # Parent 267afb7fb700250a2a644d5f717a6061790193ae updated diff -r 267afb7fb700 -r 289728193164 thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Wed Mar 02 14:06:13 2016 +0000 +++ b/thys/Paper/Paper.thy Wed Mar 02 14:35:50 2016 +0000 @@ -9,17 +9,21 @@ "der_syn r c \ der c r" notation (latex output) - If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and + If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and + ZERO ("\<^bold>0" 80) and ONE ("\<^bold>1" 80) and CHAR ("_" [1000] 80) and ALT ("_ + _" [77,77] 78) and SEQ ("_ \ _" [77,77] 78) and STAR ("_\<^sup>\" [1000] 78) and + + val.Void ("'(')" 78) and val.Char ("Char _" [1000] 78) and val.Left ("Left _" [1000] 78) and val.Right ("Right _" [1000] 78) and + L ("L'(_')" [10] 78) and der_syn ("_\\_" [79, 1000] 76) and flat ("|_|" [70] 73) and @@ -153,15 +157,14 @@ section {* Preliminaries *} -text {* \noindent Strings in Isabelle/HOL are lists of characters with - the empty string being represented by the empty list, written @{term - "[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}. - Often we use the usual bracket notation for strings; for example a - string consisting of a single character is written @{term "[c]"}. By - using the type @{type char} for characters we have a supply of - finitely many characters roughly corresponding to the ASCII - character set. Regular expressions are defined as usual as the - following inductive datatype: +text {* \noindent Strings in Isabelle/HOL are lists of characters with the +empty string being represented by the empty list, written @{term "[]"}, and +list-cons being written as @{term "DUMMY # DUMMY"}. Often we use the usual +bracket notation for strings; for example a string consisting of just a +single character is written @{term "[c]"}. By using the type @{type char} +for characters we have a supply of finitely many characters roughly +corresponding to the ASCII character set. Regular expressions are defined as +usual as the following inductive datatype: \begin{center} @{text "r :="} @@ -249,6 +252,45 @@ section {* POSIX Regular Expression Matching *} +text {* + +The clever idea in \cite{Sulzmann2014} is to define a function on values that mirrors +(but inverts) the construction of the derivative on regular expressions. We +begin with the case of a nullable regular expression: from the nullability +we need to construct a value that witnesses the nullability. This is as +follows. The @{const mkeps} function (from \cite{Sulzmann2014}) is a partial (but +unambiguous) function from regular expressions to values, total on exactly +the set of nullable regular expressions. + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ + @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ + \end{tabular} + \end{center} + +The well-known idea of POSIX lexing is informally defined in (for example) +\cite{posix}; as correctly argued in \cite{Sulzmann2014}, this needs formal +specification. The rough idea is that, in contrast to the so-called GREEDY +algorithm, POSIX lexing chooses to match more deeply and using left choices +rather than a right choices. For example, note that to match the string +@{term "[a, b]"} with the regular expression $(a + \mts)\circ (b+ab)$ the matching +will return $( Void, Right(ab))$ rather than $(Left\ a, Left\ b)$. [The +regular expression $ab$ is short for $(Lit\ a) \circ (Lit\ b)$.] Similarly, +to match {\em ``a''} with $(a+a)$ the leftmost $a$ will be chosen. + +We use a simple inductive definition to specify this notion, incorporating +the POSIX-specific choices into the side-conditions for the rules $R tl ++_2$, $R tl\circ$ and $R tl*$ (as they are now called). By contrast, +\cite{Sulzmann2014} defines a relation between values and argues that there is a +maximum value, as given by the derivative-based algorithm yet to be spelt +out. The relation we define is ternary, relating strings, values and regular +expressions. + +*} + section {* The Argument by Sulzmmann and Lu *} section {* Conclusion *} @@ -278,20 +320,6 @@ @{term "Stars vs"} \end{center} - \noindent - The language of a regular expression - - \begin{center} - \begin{tabular}{lcl} - @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ - @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ - @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ - @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ - \end{tabular} - \end{center} - \noindent @@ -312,14 +340,7 @@ \noindent The @{const mkeps} function - \begin{center} - \begin{tabular}{lcl} - @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ - @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ - @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ - \end{tabular} - \end{center} + \noindent The @{text inj} function diff -r 267afb7fb700 -r 289728193164 thys/ReStar.thy --- a/thys/ReStar.thy Wed Mar 02 14:06:13 2016 +0000 +++ b/thys/ReStar.thy Wed Mar 02 14:35:50 2016 +0000 @@ -136,7 +136,7 @@ where "der c (ZERO) = ZERO" | "der c (ONE) = ZERO" -| "der c (CHAR c') = (if c = c' then ONE else ZERO)" +| "der c (CHAR d) = (if c = d then ONE else ZERO)" | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" | "der c (SEQ r1 r2) = (if nullable r1 @@ -399,7 +399,7 @@ apply(erule Prf.cases) apply(simp_all)[7] apply(simp) -apply(case_tac "c = c'") +apply(case_tac "c = d") apply(simp) apply(auto)[1] apply(erule Prf.cases) @@ -1168,7 +1168,7 @@ apply(erule PMatch.cases) apply(simp_all)[7] (* CHAR case *) -apply(case_tac "c = c'") +apply(case_tac "c = d") apply(simp) apply(erule PMatch.cases) apply(simp_all)[7] diff -r 267afb7fb700 -r 289728193164 thys/paper.pdf Binary file thys/paper.pdf has changed