updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Thu, 23 Feb 2023 12:37:34 +0000
changeset 644 9f984ff20020
parent 643 9580bae0500d
child 645 304a12cdda6f
updated
progs/scala/re-annotated2.sc
thys3/Paper.thy
--- a/progs/scala/re-annotated2.sc	Mon Feb 20 23:40:30 2023 +0000
+++ b/progs/scala/re-annotated2.sc	Thu Feb 23 12:37:34 2023 +0000
@@ -223,6 +223,11 @@
   case r => r
 }
 
+def bders_simp(r: ARexp, s: List[Char]) : ARexp = s match {
+  case Nil => r
+  case c::cs => bders_simp(bsimp(bder(c, r)), cs)
+}
+
 def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
   case Nil => if (bnullable(r)) bmkeps(r) 
               else throw new Exception("Not matched")
@@ -368,3 +373,16 @@
 val n = 200
 println(s"lexing fib program ($n times, size ${prog2.length * n})")
 println(time_needed(1, blexing_simp(WHILE_REGS, prog2 * n)))
+
+
+
+
+
+val reg2 = STAR("a" | "aa")
+
+println(bsize(bders_simp(internalise(reg2), ("a" * 0).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 1).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 2).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 3).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 4).toList)))
+println(bsize(bders_simp(internalise(reg2), ("a" * 5).toList)))
--- a/thys3/Paper.thy	Mon Feb 20 23:40:30 2023 +0000
+++ b/thys3/Paper.thy	Thu Feb 23 12:37:34 2023 +0000
@@ -260,8 +260,9 @@
   expressions the \emph{bounded} regular expression @{term "NTIMES r
   n"} where the @{term n} specifies that @{term r} should match
   exactly @{term n}-times (it is not included in Sulzmann and Lu's original work). For brevity we omit the other bounded
-  regular expressions @{text "r"}$^{\{..n\}}$, @{text "r"}$^{\{n..\}}$
-  and @{text "r"}$^{\{n..m\}}$ which specify intervals for how many
+  regular expressions @{text "r"}$^{\{..\textit{n}\}}$,
+  @{text "r"}$^{\{\textit{n}..\}}$
+  and @{text "r"}$^{\{\textit{n}..\textit{m}\}}$ which specify intervals for how many
   times @{text r} should match. The results presented in this paper
   extend straightforwardly to them too. The importance of the bounded
   regular expressions is that they are often used in practical
@@ -270,7 +271,7 @@
   al~\cite{BjorklundMartensTimm2015}, bounded regular expressions 
   occur frequently in the latter and can have counters of up to
   ten million.  The problem is that tools based on the classic notion
-  of automata need to expand @{text "r"}$^{\{n\}}$ into @{text n}
+  of automata need to expand @{text "r"}$^{\{\textit{n}\}}$ into @{text n}
   connected copies of the automaton for @{text r}. This leads to very
   inefficient matching algorithms or algorithms that consume large
   amounts of memory.  A classic example is the regular expression
@@ -397,9 +398,9 @@
   %
   \begin{center}
   \begin{tabular}{@ {}l@ {}}
-  @{thm[mode=Axiom] Prf.intros(4)}\quad
-  @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\quad
-  @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\quad
+  @{thm[mode=Axiom] Prf.intros(4)}\qquad
+  @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\qquad
+  @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\qquad
   @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\
   @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\qquad
   @{thm[mode=Rule] Prf.intros(6)[of "vs" "r"]}\qquad
@@ -542,7 +543,7 @@
   a list of exactly @{term n} copies, which is the length of the list we expect in this
   case.  The injection function\footnote{While the character argument @{text c} is not
   strictly necessary in the @{text inj}-function for the fragment of regular expressions we
-  use in this paper, it is necessary for extended regular expressions, for example for the range regular expression of the form @{text "[a-z]"}.
+  use in this paper, it is necessary for extended regular expressions. For example for the range regular expression of the form @{text "[a-z]"}.
   We therefore keep this argument from the original formulation of @{text inj} by Sulzmann and Lu.}
   then calculates the corresponding value for each intermediate derivative until
   a value for the original regular expression is generated.
@@ -1569,7 +1570,7 @@
 the POSIX rules and we have not been able to
 reconcile this problem. Therefore we leave better bounds for future work.%!\\[-6.5mm]
 
-Note that while Antimirov was able to give a bound on the \emph{size}
+Note also that while Antimirov was able to give a bound on the \emph{size}
 of his partial derivatives~\cite{Antimirov95}, Brzozowski gave a bound
 on the \emph{number} of derivatives, provided they are quotient via
 ACI rules \cite{Brzozowski1964}. Brzozowski's result is crucial when one