--- a/progs/scala/re-annotated2.sc Mon Feb 20 23:40:30 2023 +0000
+++ b/progs/scala/re-annotated2.sc Thu Feb 23 12:37:34 2023 +0000
@@ -223,6 +223,11 @@
case r => r
}
+def bders_simp(r: ARexp, s: List[Char]) : ARexp = s match {
+ case Nil => r
+ case c::cs => bders_simp(bsimp(bder(c, r)), cs)
+}
+
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
case Nil => if (bnullable(r)) bmkeps(r)
else throw new Exception("Not matched")
@@ -368,3 +373,16 @@
val n = 200
println(s"lexing fib program ($n times, size ${prog2.length * n})")
println(time_needed(1, blexing_simp(WHILE_REGS, prog2 * n)))
+
+
+
+
+
+val reg2 = STAR("a" | "aa")
+
+println(bsize(bders_simp(internalise(reg2), ("a" * 0).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 1).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 2).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 3).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 4).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 5).toList)))
--- a/thys3/Paper.thy Mon Feb 20 23:40:30 2023 +0000
+++ b/thys3/Paper.thy Thu Feb 23 12:37:34 2023 +0000
@@ -260,8 +260,9 @@
expressions the \emph{bounded} regular expression @{term "NTIMES r
n"} where the @{term n} specifies that @{term r} should match
exactly @{term n}-times (it is not included in Sulzmann and Lu's original work). For brevity we omit the other bounded
- regular expressions @{text "r"}$^{\{..n\}}$, @{text "r"}$^{\{n..\}}$
- and @{text "r"}$^{\{n..m\}}$ which specify intervals for how many
+ regular expressions @{text "r"}$^{\{..\textit{n}\}}$,
+ @{text "r"}$^{\{\textit{n}..\}}$
+ and @{text "r"}$^{\{\textit{n}..\textit{m}\}}$ which specify intervals for how many
times @{text r} should match. The results presented in this paper
extend straightforwardly to them too. The importance of the bounded
regular expressions is that they are often used in practical
@@ -270,7 +271,7 @@
al~\cite{BjorklundMartensTimm2015}, bounded regular expressions
occur frequently in the latter and can have counters of up to
ten million. The problem is that tools based on the classic notion
- of automata need to expand @{text "r"}$^{\{n\}}$ into @{text n}
+ of automata need to expand @{text "r"}$^{\{\textit{n}\}}$ into @{text n}
connected copies of the automaton for @{text r}. This leads to very
inefficient matching algorithms or algorithms that consume large
amounts of memory. A classic example is the regular expression
@@ -397,9 +398,9 @@
%
\begin{center}
\begin{tabular}{@ {}l@ {}}
- @{thm[mode=Axiom] Prf.intros(4)}\quad
- @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\quad
- @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\quad
+ @{thm[mode=Axiom] Prf.intros(4)}\qquad
+ @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\qquad
+ @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\qquad
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\qquad
@{thm[mode=Rule] Prf.intros(6)[of "vs" "r"]}\qquad
@@ -542,7 +543,7 @@
a list of exactly @{term n} copies, which is the length of the list we expect in this
case. The injection function\footnote{While the character argument @{text c} is not
strictly necessary in the @{text inj}-function for the fragment of regular expressions we
- use in this paper, it is necessary for extended regular expressions, for example for the range regular expression of the form @{text "[a-z]"}.
+ use in this paper, it is necessary for extended regular expressions. For example for the range regular expression of the form @{text "[a-z]"}.
We therefore keep this argument from the original formulation of @{text inj} by Sulzmann and Lu.}
then calculates the corresponding value for each intermediate derivative until
a value for the original regular expression is generated.
@@ -1569,7 +1570,7 @@
the POSIX rules and we have not been able to
reconcile this problem. Therefore we leave better bounds for future work.%!\\[-6.5mm]
-Note that while Antimirov was able to give a bound on the \emph{size}
+Note also that while Antimirov was able to give a bound on the \emph{size}
of his partial derivatives~\cite{Antimirov95}, Brzozowski gave a bound
on the \emph{number} of derivatives, provided they are quotient via
ACI rules \cite{Brzozowski1964}. Brzozowski's result is crucial when one