lexing: changeset 402:1612f2a77bf6

--- a/thys2/Paper/Paper.thy	Sun Jan 30 01:03:26 2022 +0000
+++ b/thys2/Paper/Paper.thy	Sun Jan 30 21:21:24 2022 +0000
@@ -26,7 +26,43 @@
   CH ("_" [1000] 80) and
   ALT ("_ + _" [77,77] 78) and
   SEQ ("_ \<cdot> _" [77,77] 78) and
-  STAR ("_\<^sup>\<star>" [79] 78) 
+  STAR ("_\<^sup>\<star>" [79] 78) and
+
+  val.Void ("Empty" 78) and
+  val.Char ("Char _" [1000] 78) and
+  val.Left ("Left _" [79] 78) and
+  val.Right ("Right _" [1000] 78) and
+  val.Seq ("Seq _ _" [79,79] 78) and
+  val.Stars ("Stars _" [79] 78) and
+
+  Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
+
+  flat ("|_|" [75] 74) and
+  flats ("|_|" [72] 74) and
+  injval ("inj _ _ _" [79,77,79] 76) and 
+  mkeps ("mkeps _" [79] 76) and 
+  length ("len _" [73] 73) and
+  set ("_" [73] 73) and
+
+  AZERO ("ZERO" 81) and 
+  AONE ("ONE _" [79] 81) and 
+  ACHAR ("CHAR _ _" [79, 79] 80) and
+  AALTs ("ALTs _ _" [77,77] 78) and
+  ASEQ ("SEQ _ _ _" [79, 77,77] 78) and
+  ASTAR ("STAR _ _" [79, 79] 78) and
+
+  code ("code _" [79] 74) and
+  intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
+  erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
+  bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
+  bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80)
+
+
+lemma better_retrieve:
+   shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
+   and   "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
+  apply (metis list.exhaust retrieve.simps(4))
+  by (metis list.exhaust retrieve.simps(5))
 
 (*>*)
 
@@ -76,6 +112,31 @@
 longest match.
 
 
+\begin{center}
+\begin{tabular}{cc}
+  \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
+  @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\
+  @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\
+  @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\
+  @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\
+  @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\
+  & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\
+  & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\
+  % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\
+  @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}
+  \end{tabular}
+  &
+  \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
+  @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\
+  @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\
+  @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\
+  @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\
+  @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\
+  @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\
+  \end{tabular}  
+\end{tabular}  
+\end{center}
+
 
 \begin{figure}[t]
 \begin{center}
@@ -151,14 +212,90 @@
   \end{figure}
 
 
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
+  @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
+  @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\
+  @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\
+  \end{tabular}
+  \end{center}
+
+  \begin{center}
+  \begin{tabular}{l@ {\hspace{5mm}}lcl}
+  \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\
+  \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & 
+      @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\
+  \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & 
+      @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
+  \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ 
+      & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
+  \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ 
+      & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
+  \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ 
+      & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
+  \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ 
+      & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\
+  \end{tabular}
+  \end{center}
+
 *}
 
-section {* Bitcoded Derivatives *}
+section {* Bitcoded Regular Expressions and Derivatives *}
 
 text {*
   bitcoded regexes / decoding / bmkeps
   gets rid of the second phase (only single phase)   
   correctness
+
+
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\
+  @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\
+  @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\
+  @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\
+  @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\
+  @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\
+  @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)}
+  \end{tabular}
+  \end{center}
+
+
+  The idea of the bitcodes is to annotate them to regular expressions and generate values
+  incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value.
+
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{term breg} & $::=$ & @{term "AZERO"}\\
+               & $\mid$ & @{term "AONE bs"}\\
+               & $\mid$ & @{term "ACHAR bs c"}\\
+               & $\mid$ & @{term "AALTs bs rs"}\\
+               & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
+               & $\mid$ & @{term "ASTAR bs r"}
+  \end{tabular}
+  \end{center}
+
+
+
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\
+  @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\
+  @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\
+  @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\
+  @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\
+  @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}
+      & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\
+  @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\
+  @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}
+  \end{tabular}
+  \end{center}
+
+
+  \begin{theorem}
+  @{thm blexer_correctness} 
+  \end{theorem}
 *}
 
 
@@ -166,31 +303,34 @@
 
 text {*
      Sulzmann \& Lu apply simplification via a fixpoint operation; also does not use erase to filter out duplicates.
-
+  
    not direct correspondence with PDERs, because of example
    problem with retrieve 
 
    correctness
 
+   
+    
+
 
 
    \begin{figure}[t]
   \begin{center}
   \begin{tabular}{c}
-  @{thm[mode=Axiom] bs1}\qquad
-  @{thm[mode=Axiom] bs2}\qquad
-  @{thm[mode=Axiom] bs3}\\
-  @{thm[mode=Rule] bs4}\qquad
-  @{thm[mode=Rule] bs5}\\
-  @{thm[mode=Rule] bs6}\qquad
-  @{thm[mode=Rule] bs7}\\
-  @{thm[mode=Rule] bs8}\\
+  @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}\qquad
+  @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}\qquad
+  @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}\\
+  @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}\qquad
+  @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}\\
+  @{thm[mode=Axiom] bs6}\qquad
+  @{thm[mode=Axiom] bs7}\\
+  @{thm[mode=Rule] bs8[of "rs\<^sub>1" "rs\<^sub>2"]}\\
   @{thm[mode=Axiom] ss1}\qquad
-  @{thm[mode=Rule] ss2}\qquad
-  @{thm[mode=Rule] ss3}\\
+  @{thm[mode=Rule] ss2[of "rs\<^sub>1" "rs\<^sub>2"]}\qquad
+  @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}\\
   @{thm[mode=Axiom] ss4}\qquad
-  @{thm[mode=Axiom] ss5}\qquad
-  @{thm[mode=Rule] ss6}\\
+  @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\
+  @{thm[mode=Rule] ss6[of "r\<^sub>1" "r\<^sub>2" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}\\
   \end{tabular}
   \end{center}
   \caption{???}\label{SimpRewrites}

--- a/thys2/Paper/document/root.tex	Sun Jan 30 01:03:26 2022 +0000
+++ b/thys2/Paper/document/root.tex	Sun Jan 30 21:21:24 2022 +0000
@@ -63,20 +63,19 @@
   expressions.  Their algorithm generates POSIX values which encode
   the information of \emph{how} a regular expression matches a
   string---that is, which part of the string is matched by which part
-  of the regular expression.  The purpose of the bitcodes in Sulzmann
-  and Lu's algorithm is to generate POSIX values incrementally while
-  derivatives are calculated. However they also help with designing
-  `aggressive' simplification functions that keep the size of
+  of the regular expression.  The purpose of the bitcodes is to generate POSIX values incrementally while
+  derivatives are calculated. They also help with designing
+  an `aggressive' simplification function that keeps the size of
   derivatives small. Without simplification derivatives can grow
   exponentially resulting in an extremely slow lexing algorithm.  In this
   paper we describe a variant of Sulzmann and Lu's algorithm: Our
-  algorithm is a small, recursive functional program, whereas Sulzmann
+  algorithm is a recursive functional program, whereas Sulzmann
   and Lu's version involves a fixpoint construction. We \textit{(i)}
   prove in Isabelle/HOL that our program is correct and generates
   unique POSIX values; we also \textit{(ii)} establish a polynomial
   bound for the size of the derivatives. The size can be seen as a
-  proxy measure for the efficiency of the lexing algorithm---that means
-  because of the polynomial bound our algorithm does not suffer from
+  proxy measure for the efficiency of the lexing algorithm: because of
+  the polynomial bound our algorithm does not suffer from
   the exponential blowup in earlier works.
   
   % Brzozowski introduced the notion of derivatives for regular

--- a/thys2/SizeBound4.thy	Sun Jan 30 01:03:26 2022 +0000
+++ b/thys2/SizeBound4.thy	Sun Jan 30 21:21:24 2022 +0000
@@ -146,7 +146,7 @@
 fun retrieve :: "arexp \<Rightarrow> val \<Rightarrow> bit list" where
   "retrieve (AONE bs) Void = bs"
 | "retrieve (ACHAR bs c) (Char d) = bs"
-| "retrieve (AALTs bs [r]) v = bs @ retrieve r v" 
+| "retrieve (AALTs bs [r]) v = bs @ retrieve r v"
 | "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
 | "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
 | "retrieve (ASEQ bs r1 r2) (Seq v1 v2) = bs @ retrieve r1 v1 @ retrieve r2 v2"
@@ -457,7 +457,7 @@
      (if (f x) \<in> acc then distinctBy xs f acc 
       else x # (distinctBy xs f ({f x} \<union> acc)))"
 
- 
+  
 
 fun flts :: "arexp list \<Rightarrow> arexp list"
   where

Binary file thys2/paper.pdf has changed

author	Christian Urban <christian.urban@kcl.ac.uk>
	Sun, 30 Jan 2022 21:21:24 +0000
changeset 402	1612f2a77bf6
parent 401	8bbe2468fedc
child 404	1500f96707b0

thys2/Paper/Paper.thy		file \| annotate \| diff \| comparison \| revisions
thys2/Paper/document/root.tex		file \| annotate \| diff \| comparison \| revisions
thys2/SizeBound4.thy		file \| annotate \| diff \| comparison \| revisions
thys2/paper.pdf		file \| annotate \| diff \| comparison \| revisions