| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Sat, 10 Sep 2022 22:30:22 +0100 | |
| changeset 599 | a5f666410101 | 
| parent 578 | e71a6e2aca2d | 
| child 615 | 8881a09a06fd | 
| permissions | -rw-r--r-- | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1 | (*<*) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 2 | theory Paper | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 3 | imports | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 4 | "Posix.LexerSimp" | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 5 | "Posix.FBound" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 6 | "HOL-Library.LaTeXsugar" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 7 | begin | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 8 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 9 | declare [[show_question_marks = false]] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 10 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 11 | notation (latex output) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 12 |   If  ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 13 |   Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 14 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 15 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 16 | abbreviation | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 17 | "der_syn r c \<equiv> der c r" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 18 | abbreviation | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 19 | "ders_syn r s \<equiv> ders s r" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 20 | abbreviation | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 21 | "bder_syn r c \<equiv> bder c r" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 22 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 23 | notation (latex output) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 24 |   der_syn ("_\\_" [79, 1000] 76) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 25 |   ders_syn ("_\\_" [79, 1000] 76) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 26 |   bder_syn ("_\\_" [79, 1000] 76) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 27 |   bders ("_\\_" [79, 1000] 76) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 28 |   bders_simp ("_\\\<^sub>b\<^sub>s\<^sub>i\<^sub>m\<^sub>p _" [79, 1000] 76) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 29 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 30 |   ZERO ("\<^bold>0" 81) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 31 |   ONE ("\<^bold>1" 81) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 32 |   CH ("_" [1000] 80) and
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 33 |   ALT ("_ + _" [76,76] 77) and
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 34 |   SEQ ("_ \<cdot> _" [78,78] 78) and
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 35 |   STAR ("_\<^sup>*" [79] 78) and
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 36 |   NTIMES ("_\<^sup>{\<^bsup>_\<^esup>\<^sup>}" [79] 78) and
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 37 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 38 |   val.Void ("Empty" 78) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 39 |   val.Char ("Char _" [1000] 78) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 40 |   val.Left ("Left _" [79] 78) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 41 |   val.Right ("Right _" [1000] 78) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 42 |   val.Seq ("Seq _ _" [79,79] 78) and
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 43 |   val.Stars ("Stars _" [1000] 78) and
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 44 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 45 |   Prf ("\<turnstile> _ : _" [75,75] 75) and  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 46 |   Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 47 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 48 |   flat ("|_|" [75] 74) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 49 |   flats ("|_|" [72] 74) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 50 |   injval ("inj _ _ _" [79,77,79] 76) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 51 |   mkeps ("mkeps _" [79] 76) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 52 |   length ("len _" [73] 73) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 53 |   set ("_" [73] 73) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 54 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 55 |   AZERO ("ZERO" 81) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 56 |   AONE ("ONE _" [79] 78) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 57 |   ACHAR ("CHAR _ _" [79, 79] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 58 |   AALTs ("ALTs _ _" [77,77] 78) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 59 |   ASEQ ("SEQ _ _ _" [79, 79,79] 78) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 60 |   ASTAR ("STAR _ _" [79, 79] 78) and
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 61 |   ANTIMES ("NT _ _ _" [79, 79, 79] 78) and
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 62 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 63 |   code ("code _" [79] 74) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 64 |   intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 65 |   erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 66 |   bnullable ("bnullable _" [1000] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 67 |   bsimp_AALTs ("bsimpALT _ _" [10,1000] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 68 |   bsimp_ASEQ ("bsimpSEQ _ _ _" [10,1000,1000] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 69 |   bmkeps ("bmkeps _" [1000] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 70 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 71 |   eq1 ("_ \<approx> _" [77,77] 76) and 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 72 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 73 |   srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 74 |   rrewrites ("_ \<^latex>\<open>\\mbox{$\\,\\leadsto^*$}\<close> _" [71, 71] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 75 |   srewrites ("_ \<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}^*$}\<close> _" [71, 71] 80) and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 76 |   blexer_simp ("blexer\<^sup>+" 1000) 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 77 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 78 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 79 | lemma better_retrieve: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 80 | shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 81 | and "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 82 | apply (metis list.exhaust retrieve.simps(4)) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 83 | by (metis list.exhaust retrieve.simps(5)) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 84 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 85 | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 86 | lemma better_retrieve2: | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 87 | shows "retrieve (ANTIMES bs r (n + 1)) (Stars (v#vs)) = | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 88 | bs @ [Z] @ retrieve r v @ retrieve (ANTIMES [] r n) (Stars vs)" | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 89 | by auto | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 90 | (*>*) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 91 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 92 | section {* Introduction *}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 93 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 94 | text {*
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 95 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 96 | In the last fifteen or so years, Brzozowski's derivatives of regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 97 | expressions have sparked quite a bit of interest in the functional | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 98 | programming and theorem prover communities. | 
| 563 
c92a41d9c4da
updated paper and theories to include n-times
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
502diff
changeset | 99 | Derivatives of a | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 100 | regular expression, written @{term "der c r"}, give a simple solution
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 101 | to the problem of matching a string @{term s} with a regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 102 | expression @{term r}: if the derivative of @{term r} w.r.t.\ (in
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 103 | succession) all the characters of the string matches the empty string, | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 104 | then @{term r} matches @{term s} (and {\em vice versa}).  
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 105 | The beauty of | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 106 | Brzozowski's derivatives \cite{Brzozowski1964} is that they are neatly
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 107 | expressible in any functional language, and easily definable and | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 108 | reasoned about in theorem provers---the definitions just consist of | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 109 | inductive datatypes and simple recursive functions. Another attractive | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 110 | feature of derivatives is that they can be easily extended to \emph{bounded}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 111 | regular expressions, such as @{term "r"}$^{\{@{text n}\}}$ or @{term r}$^{\{..@{text n}\}}$, where numbers or
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 112 | intervals of numbers specify how many times a regular expression should be used | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 113 | during matching. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 114 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 115 | |
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 116 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 117 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 118 | However, there are two difficulties with derivative-based matchers: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 119 | First, Brzozowski's original matcher only generates a yes/no answer | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 120 | for whether a regular expression matches a string or not. This is too | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 121 | little information in the context of lexing where separate tokens must | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 122 | be identified and also classified (for example as keywords | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 123 | or identifiers).  Sulzmann and Lu~\cite{Sulzmann2014} overcome this
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 124 | difficulty by cleverly extending Brzozowski's matching | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 125 | algorithm. Their extended version generates additional information on | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 126 | \emph{how} a regular expression matches a string following the POSIX
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 127 | rules for regular expression matching. They achieve this by adding a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 128 | second ``phase'' to Brzozowski's algorithm involving an injection | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 129 | function. In our own earlier work we provided the formal | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 130 | specification of what POSIX matching means and proved in Isabelle/HOL | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 131 | the correctness | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 132 | of Sulzmann and Lu's extended algorithm accordingly | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 133 | \cite{AusafDyckhoffUrban2016}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 134 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 135 | The second difficulty is that Brzozowski's derivatives can | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 136 | grow to arbitrarily big sizes. For example if we start with the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 137 | regular expression \mbox{@{text "(a + aa)\<^sup>*"}} and take
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 138 | successive derivatives according to the character $a$, we end up with | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 139 | a sequence of ever-growing derivatives like | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 140 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 141 | \def\ll{\stackrel{\_\backslash{} a}{\longrightarrow}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 142 | \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 143 | \begin{tabular}{rll}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 144 | $(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 145 | & $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 146 | & $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 147 | & & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 148 | & $\ll$ & \ldots \hspace{15mm}(regular expressions of sizes 98, 169, 283, 468, 767, \ldots)
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 149 | \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 150 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 151 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 152 | \noindent where after around 35 steps we run out of memory on a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 153 | typical computer (we shall define shortly the precise details of our | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 154 | regular expressions and the derivative operation). Clearly, the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 155 | notation involving $\ZERO$s and $\ONE$s already suggests | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 156 | simplification rules that can be applied to regular regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 157 | expressions, for example $\ZERO{}\,r \Rightarrow \ZERO$, $\ONE{}\,r
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 158 | \Rightarrow r$, $\ZERO{} + r \Rightarrow r$ and $r + r \Rightarrow
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 159 | r$. While such simple-minded simplifications have been proved in our | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 160 | earlier work to preserve the correctness of Sulzmann and Lu's | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 161 | algorithm \cite{AusafDyckhoffUrban2016}, they unfortunately do
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 162 | \emph{not} help with limiting the growth of the derivatives shown
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 163 | above: the growth is slowed, but some derivatives can still grow rather | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 164 | quickly beyond any finite bound. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 165 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 166 | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 167 | Sulzmann and Lu address this ``growth problem'' in a second algorithm | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 168 | \cite{Sulzmann2014} where they introduce bitcoded
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 169 | regular expressions. In this version, POSIX values are | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 170 | represented as bitsequences and such sequences are incrementally generated | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 171 | when derivatives are calculated. The compact representation | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 172 | of bitsequences and regular expressions allows them to define a more | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 173 | ``aggressive'' simplification method that keeps the size of the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 174 | derivatives finitely bounded no matter what the length of the string is. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 175 | They make some informal claims about the correctness and linear behaviour | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 176 | of this version, but do not provide any supporting proof arguments, not | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 177 | even ``pencil-and-paper'' arguments. They write about their bitcoded | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 178 | \emph{incremental parsing method} (that is the algorithm to be formalised
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 179 | in this paper): | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 180 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 181 | \begin{quote}\it
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 182 | ``Correctness Claim: We further claim that the incremental parsing | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 183 | method [..] in combination with the simplification steps [..] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 184 | yields POSIX parse trees. We have tested this claim | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 185 | extensively [..] but yet | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 186 |   have to work out all proof details.'' \cite[Page 14]{Sulzmann2014}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 187 | \end{quote}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 188 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 189 | \noindent{}\textbf{Contributions:} We have implemented in Isabelle/HOL
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 190 | the derivative-based lexing algorithm of Sulzmann and Lu | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 191 | \cite{Sulzmann2014} where regular expressions are annotated with
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 192 | bitsequences. We define the crucial simplification function as a | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 193 | recursive function, without the need of a fixpoint operation. One objective of | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 194 | the simplification function is to remove duplicates of regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 195 | expressions. For this Sulzmann and Lu use in their paper the standard | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 196 | @{text nub} function from Haskell's list library, but this function
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 197 | does not achieve the intended objective with bitcoded regular expressions. The | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 198 | reason is that in the bitcoded setting, each copy generally has a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 199 | different bitcode annotation---so @{text nub} would never ``fire''.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 200 | Inspired by Scala's library for lists, we shall instead use a @{text
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 201 | distinctWith} function that finds duplicates under an ``erasing'' function | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 202 | which deletes bitcodes before comparing regular expressions. | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 203 | We shall also introduce our \emph{own} argument and definitions for
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 204 | establishing the correctness of the bitcoded algorithm when | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 205 | simplifications are included. Finally we | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 206 | establish that the size of derivatives can be finitely bounded.\medskip%\footnote{ In this paper, we shall first briefly introduce the basic notions
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 207 | %of regular expressions and describe the definition | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 208 | %of POSIX lexing from our earlier work \cite{AusafDyckhoffUrban2016}. This serves
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 209 | %as a reference point for what correctness means in our Isabelle/HOL proofs. We shall then prove | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 210 | %the correctness for the bitcoded algorithm without simplification, and | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 211 | %after that extend the proof to include simplification.}\smallskip | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 212 | %\mbox{}\\[-10mm]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 213 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 214 | \noindent In this paper, we shall first briefly introduce the basic notions | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 215 | of regular expressions and describe the definition | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 216 | of POSIX lexing from our earlier work \cite{AusafDyckhoffUrban2016}. This serves
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 217 | as a reference point for what correctness means in our Isabelle/HOL proofs. We shall then prove | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 218 | the correctness for the bitcoded algorithm without simplification, and | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 219 | after that extend the proof to include simplification.\mbox{}\\[-6mm]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 220 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 221 | *} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 222 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 223 | section {* Background *}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 224 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 225 | text {*
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 226 | In our Isabelle/HOL formalisation strings are lists of characters with | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 227 | the empty string being represented by the empty list, written $[]$, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 228 | and list-cons being written as $\_\!::\!\_\,$; string | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 229 | concatenation is $\_ \,@\, \_\,$. We often use the usual | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 230 | bracket notation for lists also for strings; for example a string | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 231 | consisting of just a single character $c$ is written $[c]$. | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 232 | Our regular expressions are defined as the following inductive | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 233 | datatype:\\[-4mm] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 234 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 235 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 236 |   @{text "r ::="} \;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 237 |   @{const "ZERO"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 238 |   @{const "ONE"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 239 |   @{term "CH c"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 240 |   @{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 241 |   @{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$
 | 
| 563 
c92a41d9c4da
updated paper and theories to include n-times
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
502diff
changeset | 242 |   @{term "STAR r"} $\mid$
 | 
| 
c92a41d9c4da
updated paper and theories to include n-times
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
502diff
changeset | 243 |   @{term "NTIMES r n"}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 244 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 245 | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 246 |   \noindent where @{const ZERO} stands for the regular expression that
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 247 |   does not match any string, @{const ONE} for the regular expression
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 248 |   that matches only the empty string and @{term c} for matching a
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 249 | character literal. The constructors $+$ and $\cdot$ represent | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 250 | alternatives and sequences, respectively. We sometimes omit the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 251 | $\cdot$ in a sequence regular expression for brevity. The | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 252 |   \emph{language} of a regular expression, written $L(r)$, is defined
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 253 | as usual and we omit giving the definition here (see for example | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 254 |   \cite{AusafDyckhoffUrban2016}).
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 255 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 256 | In our work here we also add to the usual ``basic'' regular | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 257 |   expressions the \emph{bounded} regular expression @{term "NTIMES r
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 258 |   n"} where the @{term n} specifies that @{term r} should match
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 259 |   exactly @{term n}-times. Again for brevity we omit the other bounded
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 260 |   regular expressions @{text "r"}$^{\{..n\}}$, @{text "r"}$^{\{n..\}}$
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 261 |   and @{text "r"}$^{\{n..m\}}$ which specify intervals for how many
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 262 |   times @{text r} should match. The results presented in this paper
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 263 | extend straightforwardly to them too. The importance of the bounded | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 264 | regular expressions is that they are often used in practical | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 265 | applications, such as Snort (a system for detecting network | 
| 599 | 266 |   intrusions) and also in XML Schema definitions. According to Bj\"{o}rklund et
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 267 |   al~\cite{BjorklundMartensTimm2015}, bounded regular expressions 
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 268 | occur frequently in the latter and can have counters of up to | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 269 | ten million. The problem is that tools based on the classic notion | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 270 |   of automata need to expand @{text "r"}$^{\{n\}}$ into @{text n}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 271 |   connected copies of the automaton for @{text r}. This leads to very
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 272 | inefficient matching algorithms or algorithms that consume large | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 273 | amounts of memory. A classic example is the regular expression | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 274 |   \mbox{@{term "SEQ (SEQ (STAR (ALT a b)) a) (NTIMES (ALT a b) n)"}}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 275 |   where the minimal DFA requires at least $2^{n + 1}$ states (see
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 276 |   \cite{CountingSet2020}). Therefore regular expression matching
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 277 | libraries that rely on the classic notion of DFAs often impose | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 278 | adhoc limits for bounded regular expressions: For example in the | 
| 571 
a76458dd9e4c
added paper about counting automata
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
569diff
changeset | 279 | regular expression matching library in the Go language and also in Google's RE2 library the regular expression | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 280 |   @{term "NTIMES a 1001"} is not permitted, because no counter can be
 | 
| 571 
a76458dd9e4c
added paper about counting automata
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
569diff
changeset | 281 | above 1000; and in the built-in regular expression library in Rust | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 282 |   expressions such as @{text "a\<^bsup>{1000}{100}{5}\<^esup>"} give an error
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 283 | message for being too big. These problems can of course be solved in matching | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 284 | algorithms where automata go beyond the classic notion and for | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 285 |   instance include explicit counters (see~\cite{CountingSet2020}).
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 286 | The point here is that Brzozowski derivatives and the algorithms by | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 287 | Sulzmann and Lu can be straightforwardly extended to deal with | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 288 | bounded regular expressions and moreover the resulting code | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 289 | still consists of only simple recursive functions and inductive | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 290 | datatypes. Finally, bounded regular expressions | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 291 | do not destroy our finite boundedness property, which we shall | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 292 | prove later on.%, because during the lexing process counters will only be | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 293 | %decremented. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 294 | |
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 295 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 296 | Central to Brzozowski's regular expression matcher are two functions | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 297 |   called @{text nullable} and \emph{derivative}. The latter is written
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 298 | $r\backslash c$ for the derivative of the regular expression $r$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 299 | w.r.t.~the character $c$. Both functions are defined by recursion over | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 300 | regular expressions. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 301 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 302 | \begin{center}
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 303 | \begin{tabular}{c @ {\hspace{-1mm}}c}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 304 |   \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 305 |   @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 306 |   @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 307 |   @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 308 |   @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 309 |   @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 310 |   & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 311 |   & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 312 |   % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 313 |   @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 314 |   @{thm (lhs) der.simps(7)} & $\dn$ & @{thm (rhs) der.simps(7)}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 315 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 316 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 317 |   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 318 |   @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 319 |   @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 320 |   @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 321 |   @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 322 |   @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 323 |   @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 324 |   @{thm (lhs) nullable.simps(7)} & $\dn$ & @{text "if"} @{text "n = 0"}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 325 |   & & @{text "then"} @{const "True"}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 326 |   & & @{text "else"} @{term "nullable r"}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 327 |   \end{tabular}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 328 | \end{tabular}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 329 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 330 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 331 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 332 | We can extend this definition to give derivatives w.r.t.~strings, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 333 |   namely @{thm (lhs) ders.simps(1)}$\dn$@{thm (rhs) ders.simps(1)}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 334 |   and @{thm (lhs) ders.simps(2)}$\dn$@{thm (rhs) ders.simps(2)}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 335 |   Using @{text nullable} and the derivative operation, we can
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 336 | define a simple regular expression matcher, namely | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 337 | $@{text "match s r"} \;\dn\; @{term nullable}(r\backslash s)$.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 338 | This is essentially Brzozowski's algorithm from 1964. Its | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 339 | main virtue is that the algorithm can be easily implemented as a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 340 | functional program (either in a functional programming language or in | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 341 | a theorem prover). The correctness of @{text match} amounts to
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 342 | establishing the property:%\footnote{It is a fun exercise to formally prove this property in a theorem prover.}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 343 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 344 | \begin{proposition}\label{matchcorr} 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 345 | @{text "match s r"} \;\;\text{if and only if}\;\; $s \in L(r)$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 346 | \end{proposition}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 347 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 348 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 349 | It is a fun exercise to formally prove this property in a theorem prover. | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 350 | We are aware | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 351 | of a mechanised correctness proof of Brzozowski's derivative-based matcher in HOL4 by | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 352 | Owens and Slind~\cite{Owens2008}. Another one in Isabelle/HOL is part
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 353 | of the work by Krauss and Nipkow~\cite{Krauss2011}.  And another one
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 354 | in Coq is given by Coquand and Siles \cite{Coquand2012}.
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 355 | Also Ribeiro and Du Bois give one in Agda~\cite{RibeiroAgda2017}.
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 356 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 357 | The novel idea of Sulzmann and Lu is to extend this algorithm for | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 358 | lexing, where it is important to find out which part of the string | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 359 | is matched by which part of the regular expression. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 360 | For this Sulzmann and Lu presented two lexing algorithms in their paper | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 361 |   \cite{Sulzmann2014}. The first algorithm consists of two phases: first a
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 362 | matching phase (which is Brzozowski's algorithm) and then a value | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 363 |   construction phase. The values encode \emph{how} a regular expression
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 364 |   matches a string. \emph{Values} are defined as the inductive datatype
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 365 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 366 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 367 |   @{text "v ::="}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 368 |   @{const "Void"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 369 |   @{term "val.Char c"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 370 |   @{term "Left v"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 371 |   @{term "Right v"} $\mid$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 372 |   @{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 373 |   @{term "Stars vs"} 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 374 |   \end{center}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 375 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 376 |   \noindent where we use @{term vs} to stand for a list of values. The
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 377 |   string underlying a value can be calculated by a @{const flat}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 378 |   function, written @{term "flat DUMMY"}. It traverses a value and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 379 |   collects the characters contained in it \cite{AusafDyckhoffUrban2016}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 380 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 381 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 382 | Sulzmann and Lu also define inductively an | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 383 | inhabitation relation that associates values to regular expressions. Our | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 384 | version of this relation is defined the following six rules for the values: | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 385 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 386 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 387 |   \begin{tabular}{@ {}c@ {}}
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 388 |   @{thm[mode=Axiom] Prf.intros(4)}\qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 389 |   @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 390 |   @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 391 |   @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 392 |   @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 393 |   @{thm[mode=Rule] Prf.intros(6)[of "vs" "r"]}\qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 394 |   $\mprset{flushleft}\inferrule{
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 395 |   @{thm (prem 1) Prf.intros(7)[of "vs\<^sub>1" "r"  "vs\<^sub>2" "n"]}\\\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 396 |   @{thm (prem 2) Prf.intros(7)[of "vs\<^sub>1" "r"  "vs\<^sub>2" "n"]}\quad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 397 |   @{thm (prem 3) Prf.intros(7)[of "vs\<^sub>1" "r"  "vs\<^sub>2" "n"]}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 398 | } | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 399 |   {@{thm (concl) Prf.intros(7)[of "vs\<^sub>1" "r"  "vs\<^sub>2" "n"]}}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 400 | $ | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 401 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 402 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 403 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 404 | \noindent Note that no values are associated with the regular expression | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 405 |   @{term ZERO}, since it cannot match any string. Interesting is our version
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 406 |   of the rule for @{term "STAR r"} where we require that each value
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 407 |   in  @{term vs} flattens to a non-empty string. This means if @{term "STAR r"} ``fires''
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 408 | one or more times, then each copy needs to match a non-empty string. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 409 | Similarly, in the rule | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 410 |   for @{term "NTIMES r n"} we require that the length of the list
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 411 |   @{term "vs\<^sub>1 @ vs\<^sub>2"} equals @{term n} (meaning the regular expression
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 412 |   @{text r} matches @{text n}-times) and that the first segment of this list
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 413 | contains values that flatten to non-empty strings followed by a segment that | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 414 | only contains values that flatten to the empty string. | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 415 | It is routine to establish how values ``inhabiting'' a regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 416 | expression correspond to the language of a regular expression, namely | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 417 |   @{thm L_flat_Prf}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 418 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 419 | In general there is more than one value inhabited by a regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 420 | expression (meaning regular expressions can typically match more | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 421 | than one string). But even when fixing a string from the language of the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 422 | regular expression, there are generally more than one way of how the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 423 | regular expression can match this string. POSIX lexing is about | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 424 | identifying the unique value for a given regular expression and a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 425 | string that satisfies the informal POSIX rules (see | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 426 |   \cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}).
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 427 |   %\footnote{POSIX
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 428 | % lexing acquired its name from the fact that the corresponding | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 429 | % rules were described as part of the POSIX specification for | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 430 |   %	Unix-like operating systems \cite{POSIX}.}
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 431 | Sometimes these | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 432 |   informal rules are called \emph{maximal munch rule} and \emph{rule priority}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 433 | One contribution of our earlier paper is to give a convenient | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 434 | specification for what POSIX values are (the inductive rules are shown in | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 435 |   Figure~\ref{POSIXrules}).
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 436 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 437 | \begin{figure}[t]
 | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 438 |   \begin{center}\small%
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 439 |   \begin{tabular}{@ {\hspace{-2mm}}c@ {}}
 | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 440 | \\[-9mm] | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 441 |   @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 442 |   @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 443 |   @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 444 |   @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 445 |   $\mprset{flushleft}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 446 | \inferrule | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 447 |    {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 448 |     @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 449 |     @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 450 |    {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\medskip\smallskip\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 451 |   @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\qquad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 452 |   $\mprset{flushleft}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 453 | \inferrule | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 454 |    {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 455 |     @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 456 |     @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 457 |     @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 458 |    {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>\medskip\smallskip\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 459 |   \mprset{sep=4mm} 
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 460 |   @{thm[mode=Rule] Posix.intros(9)}\<open>Pn[]\<close>\quad 
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 461 |   $\mprset{flushleft}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 462 | \inferrule | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 463 |    {@{thm (prem 1) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 464 |     @{thm (prem 2) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \qquad
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 465 |     @{thm (prem 3) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \\\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 466 |     @{thm (prem 4) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]}}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 467 |    {@{thm (concl) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]}}$\<open>Pn+\<close>
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 468 | \\[-4mm] | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 469 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 470 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 471 |   \caption{The inductive definition of POSIX values taken from our earlier paper \cite{AusafDyckhoffUrban2016}. The ternary relation, written $(s, r) \rightarrow v$, formalises the notion
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 472 | of given a string $s$ and a regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 473 | expression $r$ what is the unique value $v$ that satisfies the informal POSIX constraints for | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 474 |   regular expression matching.\smallskip}\label{POSIXrules}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 475 |   \end{figure}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 476 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 477 |   The clever idea by Sulzmann and Lu \cite{Sulzmann2014} in their first algorithm is to define
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 478 | an injection function on values that mirrors (but inverts) the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 479 | construction of the derivative on regular expressions. Essentially it | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 480 | injects back a character into a value. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 481 |   For this they define two functions called @{text mkeps} and @{text inj}:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 482 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 483 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 484 |   \begin{tabular}{@ {}l@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 485 |   \begin{tabular}{@ {}lcl@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 486 |   @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 487 |   @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 488 |   @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 489 |   @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 490 |   @{thm (lhs) mkeps.simps(5)} & $\dn$ & @{thm (rhs) mkeps.simps(5)}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 491 |   \end{tabular}\medskip\\
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 492 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 493 |   \begin{tabular}{@ {}cc@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 494 |   \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 495 |   @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 496 |   @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 497 |       @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 498 |   @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 499 |       @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 500 |   @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ 
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 501 |       & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 502 |   @{thm (lhs) injval.simps(8)[of "r" "n" "c" "v" "vs"]} & $\dn$ 
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 503 |       & @{thm (rhs) injval.simps(8)[of "r" "n" "c" "v" "vs"]}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 504 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 505 | & | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 506 |   \begin{tabular}{@ {\hspace{-3mm}}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}@ {}}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 507 |   @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 508 |       & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 509 |   @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 510 |       & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 511 |   @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$\\ 
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 512 |   \multicolumn{3}{@ {}r@ {}}{@{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}}\\ \mbox{}\\ \mbox{}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 513 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 514 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 515 |   \end{tabular}\smallskip
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 516 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 517 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 518 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 519 |   The function @{text mkeps} is run when the last derivative is nullable, that is
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 520 | the string to be matched is in the language of the regular expression. It generates | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 521 | a value for how the last derivative can match the empty string. In case | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 522 |   of @{term "NTIMES r n"} we use the function @{term replicate} in order to generate
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 523 |   a list of exactly @{term n} copies, which is the length of the list we expect in this
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 524 | case. The injection function | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 525 | then calculates the corresponding value for each intermediate derivative until | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 526 | a value for the original regular expression is generated. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 527 | Graphically the algorithm by | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 528 |   Sulzmann and Lu can be illustrated by the following picture %in Figure~\ref{Sulz}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 529 |   where the path from the left to the right involving @{term derivatives}/@{const
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 530 | nullable} is the first phase of the algorithm (calculating successive | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 531 |   \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 532 | left, the second phase. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 533 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 534 | \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 535 | \begin{tikzpicture}[scale=0.99,node distance=9mm,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 536 |                     every node/.style={minimum size=6mm}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 537 | \node (r1)  {@{term "r\<^sub>1"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 538 | \node (r2) [right=of r1]{@{term "r\<^sub>2"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 539 | \draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 540 | \node (r3) [right=of r2]{@{term "r\<^sub>3"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 541 | \draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 542 | \node (r4) [right=of r3]{@{term "r\<^sub>4"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 543 | \draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 544 | \draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 545 | \node (v4) [below=of r4]{@{term "v\<^sub>4"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 546 | \draw[->,line width=1mm](r4) -- (v4); | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 547 | \node (v3) [left=of v4] {@{term "v\<^sub>3"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 548 | \draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 549 | \node (v2) [left=of v3]{@{term "v\<^sub>2"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 550 | \draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 551 | \node (v1) [left=of v2] {@{term "v\<^sub>1"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 552 | \draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 553 | \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 554 | \end{tikzpicture}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 555 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 556 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 557 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 558 | The picture shows the steps required when a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 559 |   regular expression, say @{text "r\<^sub>1"}, matches the string @{term
 | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 560 | "[a,b,c]"}. The first lexing algorithm by Sulzmann and Lu can be defined as:%\\[-8mm] | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 561 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 562 | %  \begin{figure}[t]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 563 | %\begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 564 | %\begin{tikzpicture}[scale=1,node distance=1cm,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 565 | %                    every node/.style={minimum size=6mm}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 566 | %\node (r1)  {@{term "r\<^sub>1"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 567 | %\node (r2) [right=of r1]{@{term "r\<^sub>2"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 568 | %\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 569 | %\node (r3) [right=of r2]{@{term "r\<^sub>3"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 570 | %\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 571 | %\node (r4) [right=of r3]{@{term "r\<^sub>4"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 572 | %\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 573 | %\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 574 | %\node (v4) [below=of r4]{@{term "v\<^sub>4"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 575 | %\draw[->,line width=1mm](r4) -- (v4); | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 576 | %\node (v3) [left=of v4] {@{term "v\<^sub>3"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 577 | %\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 578 | %\node (v2) [left=of v3]{@{term "v\<^sub>2"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 579 | %\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 580 | %\node (v1) [left=of v2] {@{term "v\<^sub>1"}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 581 | %\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 582 | %\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 583 | %\end{tikzpicture}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 584 | %\end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 585 | %\mbox{}\\[-13mm]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 586 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 587 | %\caption{The two phases of the first algorithm by Sulzmann \& Lu \cite{Sulzmann2014},
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 588 | %matching the string @{term "[a,b,c]"}. The first phase (the arrows from 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 589 | %left to right) is \Brz's matcher building successive derivatives. If the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 590 | %last regular expression is @{term nullable}, then the functions of the 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 591 | %second phase are called (the top-down and right-to-left arrows): first | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 592 | %@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 593 | %how the empty string has been recognised by @{term "r\<^sub>4"}. After
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 594 | %that the function @{term inj} ``injects back'' the characters of the string into
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 595 | %the values. The value @{term "v\<^sub>1"} is the result of the algorithm representing
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 596 | %the POSIX value for this string and | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 597 | %regular expression. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 598 | %\label{Sulz}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 599 | %\end{figure} 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 600 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 601 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 602 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 603 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 604 |   \begin{tabular}{lcl}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 605 |   @{thm (lhs) lexer.simps(1)} & $\dn$ & @{thm (rhs) lexer.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 606 |   @{thm (lhs) lexer.simps(2)} & $\dn$ & @{text "case"} @{term "lexer (der c r) s"} @{text of}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 607 |                      @{term "None"}  @{text "\<Rightarrow>"} @{term None}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 608 |                      & & \hspace{24mm}$|\;$ @{term "Some v"} @{text "\<Rightarrow>"} @{term "Some (injval r c v)"}                          
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 609 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 610 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 611 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 612 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 613 | We have shown in our earlier paper \cite{AusafDyckhoffUrban2016} that
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 614 | this algorithm is correct, that is it generates POSIX values. The | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 615 | central property we established relates the derivative operation to the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 616 | injection function. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 617 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 618 |   \begin{proposition}\label{Posix2}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 619 | 	\textit{If} $(s,\; r\backslash c) \rightarrow v$ \textit{then} $(c :: s,\; r) \rightarrow$ \textit{inj} $r\; c\; v$. 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 620 | \end{proposition}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 621 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 622 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 623 | With this in place we were able to prove: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 624 | |
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 625 |   \begin{proposition}\mbox{}\label{lexercorrect}
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 626 |   \textrm{(1)} @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}.\\
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 627 |   \mbox{\hspace{29mm}}\textrm{(2)}\; @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}.
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 628 | % | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 629 | % \smallskip\\ | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 630 |   %\begin{tabular}{ll}
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 631 |   %(1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 632 |   %(2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 633 |   %\end{tabular}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 634 |   \end{proposition}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 635 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 636 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 637 | In fact we have shown that, in the success case, the generated POSIX value $v$ is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 638 | unique and in the failure case that there is no POSIX value $v$ that satisfies | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 639 | $(s, r) \rightarrow v$. While the algorithm is correct, it is excruciatingly | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 640 | slow in cases where the derivatives grow arbitrarily (recall the example from the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 641 | Introduction). However it can be used as a convenient reference point for the correctness | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 642 | proof of the second algorithm by Sulzmann and Lu, which we shall describe next. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 643 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 644 | *} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 645 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 646 | section {* Bitcoded Regular Expressions and Derivatives *}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 647 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 648 | text {*
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 649 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 650 |   In the second part of their paper \cite{Sulzmann2014},
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 651 | Sulzmann and Lu describe another algorithm that also generates POSIX | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 652 | values but dispenses with the second phase where characters are | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 653 | injected ``back'' into values. For this they annotate bitcodes to | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 654 | regular expressions, which we define in Isabelle/HOL as the datatype | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 655 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 656 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 657 |   @{term breg} $\;::=\;$ @{term "AZERO"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 658 |                $\;\mid\;$ @{term "AONE bs"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 659 |                $\;\mid\;$ @{term "ACHAR bs c"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 660 |                $\;\mid\;$ @{term "AALTs bs rs"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 661 |                $\;\mid\;$ @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 662 |                $\;\mid\;$ @{term "ASTAR bs r"}
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 663 | 	       $\;\mid\;$ @{term "ANTIMES bs r n"}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 664 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 665 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 666 |   \noindent where @{text bs} stands for bitsequences; @{text r},
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 667 |   @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for bitcoded regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 668 |   expressions; and @{text rs} for lists of bitcoded regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 669 |   expressions. The binary alternative @{text "ALT bs r\<^sub>1 r\<^sub>2"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 670 |   is just an abbreviation for \mbox{@{text "ALTs bs [r\<^sub>1, r\<^sub>2]"}}. 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 671 | For bitsequences we use lists made up of the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 672 |   constants @{text Z} and @{text S}.  The idea with bitcoded regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 673 | expressions is to incrementally generate the value information (for | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 674 |   example @{text Left} and @{text Right}) as bitsequences. For this 
 | 
| 578 
e71a6e2aca2d
updated paper and literature
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
571diff
changeset | 675 |   Sulzmann and Lu follow Nielsen and Henglein \cite{NielsenHenglein2011}
 | 
| 
e71a6e2aca2d
updated paper and literature
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
571diff
changeset | 676 | and define a coding | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 677 | function for how values can be coded into bitsequences. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 678 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 679 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 680 |   \begin{tabular}{cc}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 681 |   \begin{tabular}{lcl}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 682 |   @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 683 |   @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 684 |   @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 685 |   @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 686 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 687 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 688 |   \begin{tabular}{lcl}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 689 |   @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 690 |   @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 691 |   @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 692 |   \mbox{\phantom{XX}}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 693 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 694 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 695 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 696 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 697 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 698 | As can be seen, this coding is ``lossy'' in the sense that we do not | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 699 | record explicitly character values and also not sequence values (for | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 700 | them we just append two bitsequences). However, the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 701 |   different alternatives for @{text Left}, respectively @{text Right}, are recorded as @{text Z} and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 702 |   @{text S} followed by some bitsequence. Similarly, we use @{text Z} to indicate
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 703 |   if there is still a value coming in the list of @{text Stars}, whereas @{text S}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 704 | indicates the end of the list. The lossiness makes the process of | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 705 | decoding a bit more involved, but the point is that if we have a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 706 |   regular expression \emph{and} a bitsequence of a corresponding value,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 707 |   then we can always decode the value accurately (see Fig.~\ref{decode}).
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 708 |   The function \textit{decode} checks whether all of the bitsequence is
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 709 |   consumed and returns the corresponding value as @{term "Some v"}; otherwise
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 710 |   it fails with @{text "None"}. We can establish that for a value $v$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 711 | inhabited by a regular expression $r$, the decoding of its | 
| 578 
e71a6e2aca2d
updated paper and literature
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
571diff
changeset | 712 |   bitsequence never fails (see also \cite{NielsenHenglein2011}).
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 713 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 714 | %The decoding can be | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 715 |   %defined by using two functions called $\textit{decode}'$ and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 716 |   %\textit{decode}:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 717 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 718 |   \begin{figure}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 719 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 720 |   \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}}
 | 
| 599 | 721 |   $\textit{decode}'\,bs\,(\ONE)$ & $\;\dn\;$ & $(\Empty, bs)$\\
 | 
| 722 |   $\textit{decode}'\,bs\,(c)$ & $\;\dn\;$ & $(\Char\,c, bs)$\\
 | |
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 723 |   $\textit{decode}'\,(\Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 724 |      $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 725 | (\Left\,v, bs_1)$\\ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 726 |   $\textit{decode}'\,(\S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 727 |      $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 728 | (\Right\,v, bs_1)$\\ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 729 |   $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 730 |         $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 731 |   & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 732 |         \hspace{2mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 733 |   $\textit{decode}'\,(\S\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 734 |   $\textit{decode}'\,(\Z\!::\!bs)\,(r^*)$ & $\dn$ & 
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 735 |          $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 736 |   & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 737 |         \hspace{2mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\\
 | 
| 599 | 738 |   $\textit{decode}'\,bs\,(r^{\{n\}})$ & $\dn$ & $\textit{decode}'\,bs\,r^*$\smallskip\medskip\\
 | 
| 739 | ||
| 740 |   $\textit{decode}\,bs\,r$ & $\dn$ & 
 | |
| 741 |      $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
 | |
| 742 |   & & $\;\;\;\,\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;\textit{else}\;\textit{None}$\\[-4mm]   
 | |
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 743 |   \end{tabular}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 744 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 745 |   \caption{Two functions, called $\textit{decode}'$ and \textit{decode}, for decoding a value from a bitsequence with the help of a regular expression.\\[-5mm]}\label{decode}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 746 |   \end{figure}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 747 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 748 | %\noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 749 |   %The function \textit{decode} checks whether all of the bitsequence is
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 750 |   %consumed and returns the corresponding value as @{term "Some v"}; otherwise
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 751 |   %it fails with @{text "None"}. We can establish that for a value $v$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 752 | %inhabited by a regular expression $r$, the decoding of its | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 753 | %bitsequence never fails. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 754 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 755 | \begin{lemma}\label{codedecode}\it
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 756 | If $\;\vdash v : r$ then | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 757 |   $\;\textit{decode}\,(\textit{code}\, v)\,r = \textit{Some}\, v$.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 758 | \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 759 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 760 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 761 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 762 |   Sulzmann and Lu define the function \emph{internalise}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 763 | in order to transform (standard) regular expressions into annotated | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 764 | regular expressions. We write this operation as $r^\uparrow$. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 765 | This internalisation uses the following | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 766 |   \emph{fuse} function.	     
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 767 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 768 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 769 |   \begin{tabular}{@ {}cc@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 770 |   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 771 |   $\textit{fuse}\,bs\,(\textit{ZERO})$ & $\dn$ & $\textit{ZERO}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 772 |   $\textit{fuse}\,bs\,(\textit{ONE}\,bs')$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 773 |      $\textit{ONE}\,(bs\,@\,bs')$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 774 |   $\textit{fuse}\,bs\,(\textit{CHAR}\,bs'\,c)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 775 |      $\textit{CHAR}\,(bs\,@\,bs')\,c$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 776 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 777 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 778 |   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 779 |   $\textit{fuse}\,bs\,(\textit{ALTs}\,bs'\,rs)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 780 |      $\textit{ALTs}\,(bs\,@\,bs')\,rs$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 781 |   $\textit{fuse}\,bs\,(\textit{SEQ}\,bs'\,r_1\,r_2)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 782 |      $\textit{SEQ}\,(bs\,@\,bs')\,r_1\,r_2$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 783 |   $\textit{fuse}\,bs\,(\textit{STAR}\,bs'\,r)$ & $\dn$ &
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 784 |      $\textit{STAR}\,(bs\,@\,bs')\,r$\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 785 |   $\textit{fuse}\,bs\,(\textit{NT}\,bs'\,r\,n)$ & $\dn$ &
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 786 |      $\textit{NT}\,(bs\,@\,bs')\,r\,n$   
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 787 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 788 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 789 |   \end{center}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 790 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 791 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 792 |   A regular expression can then be \emph{internalised} into a bitcoded
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 793 | regular expression as follows: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 794 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 795 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 796 |   \begin{tabular}{@ {}ccc@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 797 |   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 798 |   $(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 799 |   $(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 800 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 801 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 802 |   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 803 |   $(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 804 |   $(r^*)^\uparrow$ & $\dn$ & $\textit{STAR}\;[]\,r^\uparrow$\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 805 |   $(r^{\{n\}})^\uparrow$ & $\dn$ &
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 806 |         $\textit{NT}\;[]\,r^\uparrow\,n$
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 807 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 808 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 809 |   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 810 | $(r_1 + r_2)^\uparrow$ & $\dn$ & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 811 |          $\textit{ALT}\;[]\,(\textit{fuse}\,[\Z]\,r_1^\uparrow)\,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 812 |                             (\textit{fuse}\,[\S]\,r_2^\uparrow)$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 813 | $(r_1\cdot r_2)^\uparrow$ & $\dn$ & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 814 |          $\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 815 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 816 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 817 |   \end{center}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 818 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 819 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 820 |   There is also an \emph{erase}-function, written $r^\downarrow$, which
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 821 | transforms a bitcoded regular expression into a (standard) regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 822 | expression by just erasing the annotated bitsequences. We omit the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 823 | straightforward definition. For defining the algorithm, we also need | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 824 |   the functions \textit{bnullable} and \textit{bmkeps}(\textit{s}), which are the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 825 |   ``lifted'' versions of \textit{nullable} and \textit{mkeps} acting on
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 826 | bitcoded regular expressions. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 827 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 828 |   \begin{center}
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 829 |   \begin{tabular}{@ {\hspace{-1mm}}c@ {\hspace{0mm}}c@ {}}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 830 |   \begin{tabular}{@ {}l@ {\hspace{0.5mm}}c@ {\hspace{1mm}}l}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 831 |   $\textit{bnullable}\,(\textit{ZERO})$ & $\dn$ & $\textit{False}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 832 |   $\textit{bnullable}\,(\textit{ONE}\,bs)$ & $\dn$ & $\textit{True}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 833 |   $\textit{bnullable}\,(\textit{CHAR}\,bs\,c)$ & $\dn$ & $\textit{False}$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 834 |   $\textit{bnullable}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 835 |      $\exists\, r \in \rs. \,\textit{bnullable}\,r$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 836 |   $\textit{bnullable}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 837 |      $\textit{bnullable}\,r_1\wedge \textit{bnullable}\,r_2$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 838 |   $\textit{bnullable}\,(\textit{STAR}\,bs\,r)$ & $\dn$ &
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 839 |      $\textit{True}$\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 840 |   $\textit{bnullable}\,(\textit{NT}\,bs\,r\,n)$ & $\dn$ &\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 841 |   \multicolumn{3}{l}{$\textit{if}\;n = 0\;\textit{then}\;\textit{True}\;
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 842 |   \textit{else}\;\textit{bnullable}\,r$}\\
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 843 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 844 | & | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 845 |   \begin{tabular}{@ {}l@ {\hspace{0.5mm}}c@ {\hspace{1mm}}l@ {}}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 846 |   $\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 847 |   $\textit{bmkeps}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 848 |   $bs\,@\,\textit{bmkepss}\,\rs$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 849 |   $\textit{bmkeps}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 850 |   \multicolumn{3}{l}{$bs \,@\,\textit{bmkeps}\,r_1\,@\, \textit{bmkeps}\,r_2$}\\
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 851 |   $\textit{bmkeps}\,(\textit{STAR}\,bs\,r)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 852 | $bs \,@\, [\S]$\\ | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 853 |    $\textit{bmkeps}\,(\textit{NT}\,bs\,r\,n)$ & $\dn$ &\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 854 |    \multicolumn{3}{l@ {}}{$\textit{if}\;n=0\;\textit{then}\;bs \,@\, [\S]$}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 855 |    \multicolumn{3}{l@ {}}{$\textit{else}\,bs \,@\, [\Z] \,@\, \textit{bmkeps}\,r\,@\,
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 856 |         \textit{bmkeps}\,(@{term "ANTIMES [] r (n - 1)"})$}\\   
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 857 |   $\textit{bmkepss}\,(r\!::\!\rs)$ & $\dn$ &\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 858 |   \multicolumn{3}{l}{$\textit{if}\;\textit{bnullable}\,r\,\textit{then}\;\textit{bmkeps}\,r\;
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 859 |   \textit{else}\;\textit{bmkepss}\,\rs$}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 860 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 861 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 862 |   \end{center}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 863 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 864 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 865 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 866 | The key function in the bitcoded algorithm is the derivative of a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 867 | bitcoded regular expression. This derivative function calculates the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 868 | derivative but at the same time also the incremental part of the bitsequences | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 869 | that contribute to constructing a POSIX value. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 870 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 871 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 872 |   \begin{tabular}{@ {}lcl@ {}}
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 873 |   $(\textit{ZERO})\backslash c$ & $\;\dn\;$ & $\textit{ZERO}$\\ 
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 874 |   $(\textit{ONE}\;bs)\backslash c$ & $\;\dn\;$ & $\textit{ZERO}$\\  
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 875 |   $(\textit{CHAR}\;bs\,d)\backslash c$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 876 |         $\textit{if}\;c=d\; \;\textit{then}\;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 877 |          \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 878 |   $(\textit{ALTs}\;bs\,\rs)\backslash c$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 879 |         $\textit{ALTs}\,bs\,(\mathit{map}\,(\_\backslash c)\,\rs)$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 880 |   $(\textit{SEQ}\;bs\,r_1\,r_2)\backslash c$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 881 |      $\textit{if}\;\textit{bnullable}\,r_1$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 882 |   & &$\textit{then}\;\textit{ALT}\,bs\;(\textit{SEQ}\,[]\,(r_1\backslash c)\,r_2)$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 883 |   $\;(\textit{fuse}\,(\textit{bmkeps}\,r_1)\,(r_2\backslash c))$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 884 |   & &$\textit{else}\;\textit{SEQ}\,bs\,(r_1\backslash c)\,r_2$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 885 |   $(\textit{STAR}\,bs\,r)\backslash c$ & $\dn$ &
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 886 |       $\textit{SEQ}\;(bs\,@\,[\Z])\,(r\backslash c)\,
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 887 |        (\textit{STAR}\,[]\,r)$\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 888 |   $(\textit{NT}\,bs\,r\,n)\backslash c$ & $\dn$ &
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 889 |       $\textit{if}\;n = 0\; \textit{then}\;\textit{ZERO}\;\textit{else}\;
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 890 |       \textit{SEQ}\;(bs\,@\,[\Z])\,(r\backslash c)\,
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 891 |        (\textit{NT}\,[]\,r\,(n - 1))$     
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 892 |   \end{tabular}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 893 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 894 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 895 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 896 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 897 | This function can also be extended to strings, written $r\backslash s$, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 898 | just like the standard derivative. We omit the details. Finally we | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 899 |   can define Sulzmann and Lu's bitcoded lexer, which we call \textit{blexer}:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 900 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 901 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 902 | \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 903 |   $\textit{blexer}\;r\,s$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 904 |       $\textit{let}\;r_{der} = (r^\uparrow)\backslash s\;\textit{in}$                
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 905 |   $\;\;\textit{if}\; \textit{bnullable}(r_{der}) \;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 906 |        \;\;\textit{else}\;\textit{None}$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 907 | \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 908 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 909 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 910 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 911 | This bitcoded lexer first internalises the regular expression $r$ and then | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 912 | builds the bitcoded derivative according to $s$. If the derivative is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 913 | (b)nullable the string is in the language of $r$ and it extracts the bitsequence using the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 914 | $\textit{bmkeps}$ function. Finally it decodes the bitsequence into a value.  If
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 915 | the derivative is \emph{not} nullable, then $\textit{None}$ is
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 916 | returned. We can show that this way of calculating a value | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 917 | generates the same result as \textit{lexer}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 918 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 919 | Before we can proceed we need to define a helper function, called | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 920 | \textit{retrieve}, which Sulzmann and Lu introduced for the correctness proof.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 921 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 922 | \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 923 |   \begin{tabular}{lcl}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 924 |   @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 925 |   @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 926 |   @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 927 |   @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 928 |   @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 929 |   @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 930 |       & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 931 |   @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 932 |   @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 933 |   @{thm (lhs) retrieve.simps(9)} & $\dn$ & @{thm (rhs) retrieve.simps(9)}\\
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 934 |   @{thm (lhs) better_retrieve2} & $\dn$ & @{thm (rhs) better_retrieve2}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 935 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 936 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 937 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 938 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 939 | The idea behind this function is to retrieve a possibly partial | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 940 | bitsequence from a bitcoded regular expression, where the retrieval is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 941 | guided by a value. For example if the value is $\Left$ then we | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 942 | descend into the left-hand side of an alternative in order to | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 943 | assemble the bitcode. Similarly for | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 944 | $\Right$. The property we can show is that for a given $v$ and $r$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 945 | with $\vdash v : r$, the retrieved bitsequence from the internalised | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 946 | regular expression is equal to the bitcoded version of $v$. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 947 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 948 | \begin{lemma}\label{retrievecode}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 949 | If $\vdash v : r$ then $\textit{code}\, v = \textit{retrieve}\,(r^\uparrow)\,v$.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 950 | \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 951 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 952 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 953 | We also need some auxiliary facts about how the bitcoded operations | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 954 | relate to the ``standard'' operations on regular expressions. For | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 955 | example if we build a bitcoded derivative and erase the result, this | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 956 | is the same as if we first erase the bitcoded regular expression and | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 957 | then perform the ``standard'' derivative operation. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 958 | |
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 959 | \begin{lemma}\label{bnullable}%\mbox{}\smallskip\\
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 960 | \textit{(1)} $(r\backslash s)^\downarrow = (r^\downarrow)\backslash s$\\    
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 961 | \mbox{\hspace{22mm}}\textit{(2)} $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 962 | \mbox{\hspace{22mm}}\textit{(3)} $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ provided $\textit{nullable}(r^\downarrow)$
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 963 | %\begin{tabular}{ll}
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 964 | %\textit{(1)} & $(r\backslash s)^\downarrow = (r^\downarrow)\backslash s$\\    
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 965 | %\textit{(2)} & $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 966 | %\textit{(3)} & $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ provided $\textit{nullable}(r^\downarrow)$.
 | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 967 | %\end{tabular}  
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 968 | \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 969 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 970 | %\begin{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 971 | % All properties are by induction on annotated regular expressions. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 972 | % %There are no interesting cases. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 973 | %\end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 974 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 975 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 976 | The only difficulty left for the correctness proof is that the bitcoded algorithm | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 977 | has only a ``forward phase'' where POSIX values are generated incrementally. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 978 | We can achieve the same effect with @{text lexer} (which has two phases) by stacking up injection
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 979 | functions during the forward phase. An auxiliary function, called $\textit{flex}$,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 980 | allows us to recast the rules of $\lexer$ in terms of a single | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 981 | phase and stacked up injection functions. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 982 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 983 | \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 984 | \begin{tabular}{@ {}l@ {}c@ {}l@ {\hspace{10mm}}l@ {}c@ {}l@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 985 |   $\textit{flex}\;r\,f\,[]$ & $\dn$ & $f$ & 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 986 |   $\textit{flex}\;r\,f\,(c\!::\!s)$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 987 |   $\textit{flex}\,(r\backslash c)\,(\lambda v.\,f\,(\inj\,r\,c\,v))\,s$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 988 | \end{tabular}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 989 | \end{center}    
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 990 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 991 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 992 | The point of this function is that when | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 993 | reaching the end of the string, we just need to apply the stacked up | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 994 | injection functions to the value generated by @{text mkeps}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 995 | Using this function we can recast the success case in @{text lexer} 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 996 | as follows: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 997 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 998 | \begin{lemma}\label{flex}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 999 | If @{text "lexer r s = Some v"} \;then\; @{text "v = "}$\,\textit{flex}\,r\,id\,s\,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1000 | (\mkeps (r\backslash s))$. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1001 | \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1002 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1003 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1004 | Note we did not redefine \textit{lexer}, we just established that the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1005 | value generated by \textit{lexer} can also be obtained by a different
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1006 | method. While this different method is not efficient (we essentially | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1007 | need to traverse the string $s$ twice, once for building the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1008 | derivative $r\backslash s$ and another time for stacking up injection | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1009 | functions), it helps us with proving | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1010 | that incrementally building up values in @{text blexer} generates the same result.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1011 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1012 | This brings us to our main lemma in this section: if we calculate a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1013 | derivative, say $r\backslash s$, and have a value, say $v$, inhabited | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1014 | by this derivative, then we can produce the result @{text lexer} generates
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1015 | by applying this value to the stacked-up injection functions | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1016 | that $\textit{flex}$ assembles. The lemma establishes that this is the same
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1017 | value as if we build the annotated derivative $r^\uparrow\backslash s$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1018 | and then retrieve the corresponding bitcoded version, followed by a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1019 | decoding step. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1020 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1021 | \begin{lemma}[Main Lemma]\label{mainlemma}\it
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1022 | If $\vdash v : r\backslash s$ then | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1023 | $\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,v) =
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1024 |   \textit{decode}(\textit{retrieve}\,(r^\uparrow \backslash s)\,v)\,r$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1025 | \end{lemma}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1026 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1027 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1028 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1029 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1030 | %With this lemma in place, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1031 | We can then prove the correctness of \textit{blexer}---it indeed
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1032 | produces the same result as \textit{lexer}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1033 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1034 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1035 | \begin{theorem}\label{thmone}
 | 
| 599 | 1036 | $\textit{blexer}\,r\,s = \textit{lexer}\,r\,s$
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1037 | \end{theorem}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1038 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1039 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1040 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1041 | \noindent This establishes that the bitcoded algorithm \emph{without}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1042 | simplification produces correct results. This was | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1043 | only conjectured by Sulzmann and Lu in their paper | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1044 | \cite{Sulzmann2014}. The next step is to add simplifications.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1045 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1046 | *} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1047 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1048 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1049 | section {* Simplification *}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1050 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1051 | text {*
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1052 | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1053 | Derivatives as calculated by Brzozowski's method are usually more | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1054 | complex regular expressions than the initial one; the result is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1055 | that derivative-based matching and lexing algorithms are | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1056 | often abysmally slow if the ``growth problem'' is not addressed. As Sulzmann and Lu wrote, various | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1057 | optimisations are possible, such as the simplifications | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1058 |      $\ZERO{}\,r \Rightarrow \ZERO$, $\ONE{}\,r \Rightarrow r$,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1059 |      $\ZERO{} + r \Rightarrow r$ and $r + r \Rightarrow r$. While these
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1060 | simplifications can considerably speed up the two algorithms in many | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1061 | cases, they do not solve fundamentally the growth problem with | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1062 | derivatives. To see this let us return to the example from the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1063 |      Introduction that shows the derivatives for \mbox{@{text "(a + aa)\<^sup>*"}}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1064 |      If we delete in the 3rd step all $\ZERO{}s$ and $\ONE$s according to
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1065 | the simplification rules shown above we obtain | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1066 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1067 | %% | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1068 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1069 |      \begin{equation}\def\xll{\xrightarrow{\_\backslash{} [a, a, a]}}\label{derivex}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1070 | (a + aa)^* \quad\xll\quad | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1071 |       \underbrace{\mbox{$(\ONE + a) \cdot (a + aa)^*$}}_{r} \;+\;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1072 |      ((a + aa)^* + \underbrace{\mbox{$(\ONE + a) \cdot (a + aa)^*$}}_{r})
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1073 |      \end{equation}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1074 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1075 | \noindent This is a simpler derivative, but unfortunately we | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1076 | cannot make any further simplifications. This is a problem because | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1077 | the outermost alternatives contains two copies of the same | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1078 | regular expression (underlined with $r$). These copies will | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1079 | spawn new copies in later derivative steps and they in turn even more copies. This | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1080 | destroys any hope of taming the size of the derivatives. But the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1081 |      second copy of $r$ in \eqref{derivex} will never contribute to a
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1082 | value, because POSIX lexing will always prefer matching a string | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1083 | with the first copy. So it could be safely removed without affecting the correctness of the algorithm. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1084 | The dilemma with the simple-minded | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1085 | simplification rules above is that the rule $r + r \Rightarrow r$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1086 | will never be applicable because as can be seen in this example the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1087 | regular expressions are not next to each other but separated by another regular expression. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1088 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1089 | But here is where Sulzmann and Lu's representation of generalised | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1090 |      alternatives in the bitcoded algorithm shines: in @{term
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1091 | "ALTs bs rs"} we can define a more aggressive simplification by | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1092 |      recursively simplifying all regular expressions in @{text rs} and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1093 | then analyse the resulting list and remove any duplicates. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1094 | Another advantage with the bitsequences in bitcoded regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1095 | expressions is that they can be easily modified such that simplification does not | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1096 | interfere with the value constructions. For example we can ``flatten'', or | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1097 |      de-nest, or spill out, @{text ALTs} as follows
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1098 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1099 | \[ | 
| 599 | 1100 |      @{term "ALTs bs\<^sub>1 (((ALTs bs\<^sub>2 rs\<^sub>2)) # rs\<^sub>1)"}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1101 |      \quad\xrightarrow{bsimp}\quad
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1102 |      @{term "ALTs bs\<^sub>1 ((map (fuse bs\<^sub>2) rs\<^sub>2) @ rs\<^sub>1)"}
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1103 | \] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1104 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1105 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1106 |      where we just need to fuse the bitsequence that has accumulated in @{text "bs\<^sub>2"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1107 |      to the alternatives in @{text "rs\<^sub>2"}. As we shall show below this will
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1108 | ensure that the correct value corresponding to the original (unsimplified) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1109 | regular expression can still be extracted. %In this way the value construction | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1110 | %is not affected by simplification. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1111 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1112 | However there is one problem with the definition for the more | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1113 | aggressive simplification rules described by Sulzmann and Lu. Recasting | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1114 | their definition with our syntax they define the step of removing | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1115 | duplicates as | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1116 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1117 |      \[ @{text "bsimp (ALTs bs rs)"} \dn @{text "ALTs
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1118 | bs (nub (map bsimp rs))"} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1119 | \] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1120 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1121 | \noindent where they first recursively simplify the regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1122 |      expressions in @{text rs} (using @{text map}) and then use
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1123 |      Haskell's @{text nub}-function to remove potential
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1124 | duplicates. While this makes sense when considering the example | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1125 |      shown in \eqref{derivex}, @{text nub} is the inappropriate
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1126 | function in the case of bitcoded regular expressions. The reason | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1127 |      is that in general the elements in @{text rs} will have a
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1128 |      different annotated bitsequence and in this way @{text nub}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1129 | will never find a duplicate to be removed. One correct way to | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1130 |      handle this situation is to first \emph{erase} the regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1131 | expressions when comparing potential duplicates. This is inspired | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1132 |      by Scala's list functions of the form \mbox{@{text "distinctWith rs eq
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1133 |      acc"}} where @{text eq} is an user-defined equivalence relation that
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1134 |      compares two elements in @{text rs}. We define this function in
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1135 | Isabelle/HOL as | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1136 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1137 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1138 |      \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1139 |      @{thm (lhs) distinctWith.simps(1)} & $\dn$ & @{thm (rhs) distinctWith.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1140 |      @{thm (lhs) distinctWith.simps(2)} & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1141 |      @{text "if (\<exists> y \<in> acc. eq x y)"} & @{text "then distinctWith xs eq acc"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1142 |      & & & @{text "else x :: distinctWith xs eq ({x} \<union> acc)"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1143 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1144 |      \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1145 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1146 | \noindent where we scan the list from left to right (because we | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1147 |      have to remove later copies). In @{text distinctWith}, @{text eq} is intended to be an
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1148 |      equivalence relation for annotated regular expressions and @{text acc} is an accumulator for annotated regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1149 | expressions---essentially a set of regular expressions that we have already seen | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1150 |      while scanning the list. Therefore we delete an element, say @{text x},
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1151 |      from the list provided a @{text "y"} with @{text "y"} being equivalent to @{text x} is already in the accumulator;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1152 |      otherwise we keep @{text x} and scan the rest of the list but 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1153 |      add @{text "x"} as another ``seen'' element to @{text acc}. We will use
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1154 |      @{term distinctWith} where @{text eq} is an equivalence that deletes bitsequences from bitcoded regular expressions
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1155 | before comparing the components. One way to define this in Isabelle/HOL is by the following recursive function from | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1156 |      annotated regular expressions to @{text bool}:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1157 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1158 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1159 |      \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1160 |      @{thm (lhs) eq1.simps(1)} & $\dn$ & @{thm (rhs) eq1.simps(1)}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1161 |      @{thm (lhs) eq1.simps(2)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(2)[of DUMMY DUMMY]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1162 |      @{thm (lhs) eq1.simps(3)[of DUMMY "c" DUMMY "d"]} & $\dn$ & @{thm (rhs) eq1.simps(3)[of DUMMY "c" DUMMY "d"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1163 |      @{thm (lhs) eq1.simps(4)[of DUMMY "r\<^sub>1\<^sub>1" "r\<^sub>1\<^sub>2" DUMMY "r\<^sub>2\<^sub>1" "r\<^sub>2\<^sub>2"]} & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1164 |      @{thm (rhs) eq1.simps(4)[of DUMMY "r\<^sub>1\<^sub>1" "r\<^sub>1\<^sub>2" DUMMY "r\<^sub>2\<^sub>1" "r\<^sub>2\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1165 |      @{thm (lhs) eq1.simps(5)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(5)[of DUMMY DUMMY]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1166 |      @{thm (lhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]} & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1167 |      @{thm (rhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1168 |      @{thm (lhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]}\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1169 |      @{thm (lhs) eq1.simps(8)[of DUMMY "r\<^sub>1" "n\<^sub>1" DUMMY "r\<^sub>2" "n\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(8)[of DUMMY "r\<^sub>1" "n\<^sub>1" DUMMY "r\<^sub>2" "n\<^sub>2"]}\\
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1170 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1171 |      \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1172 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1173 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1174 |      where all other cases are set to @{text False}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1175 |      This equivalence is clearly a computationally more expensive operation than @{text nub},
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1176 | but is needed in order to make the removal of unnecessary copies | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1177 | to work properly. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1178 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1179 | Our simplification function depends on three more helper functions, one is called | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1180 |      @{text flts} and analyses lists of regular expressions coming from alternatives.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1181 | It is defined as follows: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1182 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1183 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1184 |      \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1185 |      \multicolumn{3}{@ {}c}{@{thm (lhs) flts.simps(1)} $\dn$ @{thm (rhs) flts.simps(1)} \qquad\qquad\qquad\qquad
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1186 |      @{thm (lhs) flts.simps(2)} $\dn$ @{thm (rhs) flts.simps(2)}}\smallskip\\
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1187 |      @{thm (lhs) flts.simps(3)[of "bs'" "rs'"]} & $\dn$ & @{thm (rhs) flts.simps(3)[of "bs'" "rs'"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1188 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1189 |      \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1190 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1191 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1192 |      The second clause of @{text flts} removes all instances of @{text ZERO} in alternatives and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1193 | the third ``de-nests'' alternatives (but retaining the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1194 |      bitsequence @{text "bs'"} accumulated in the inner alternative). There are
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1195 | some corner cases to be considered when the resulting list inside an alternative is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1196 | empty or a singleton list. We take care of those cases in the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1197 |      @{text "bsimpALTs"} function; similarly we define a helper function that simplifies
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1198 |      sequences according to the usual rules about @{text ZERO}s and @{text ONE}s:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1199 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1200 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1201 |      \begin{tabular}{c@ {\hspace{5mm}}c}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1202 |      \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1203 |      @{text "bsimpALTs bs []"}  & $\dn$ & @{text "ZERO"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1204 |      @{text "bsimpALTs bs [r]"} & $\dn$ & @{text "fuse bs r"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1205 |      @{text "bsimpALTs bs rs"}  & $\dn$ & @{text "ALTs bs rs"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1206 |      \mbox{}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1207 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1208 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1209 |      \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1210 |      @{text "bsimpSEQ bs _ ZERO"}  & $\dn$ & @{text "ZERO"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1211 |      @{text "bsimpSEQ bs ZERO _"} & $\dn$ & @{text "ZERO"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1212 |      @{text "bsimpSEQ bs\<^sub>1 (ONE bs\<^sub>2) r\<^sub>2"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1213 |          & $\dn$ & @{text "fuse (bs\<^sub>1 @ bs\<^sub>2) r\<^sub>2"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1214 |      @{text "bsimpSEQ bs r\<^sub>1 r\<^sub>2"} & $\dn$ &  @{text "SEQ bs r\<^sub>1 r\<^sub>2"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1215 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1216 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1217 |      \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1218 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1219 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1220 | With this in place we can define our simplification function as | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1221 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1222 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1223 |      \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1224 |      @{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1225 |          @{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1226 |      @{thm (lhs) bsimp.simps(2)[of "bs" _]} & $\dn$ & @{text "bsimpALTs bs (distinctWith (flts (map bsimp rs)) \<approx> \<emptyset>)"}\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1227 |      @{text "bsimp r"} & $\dn$ & @{text r}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1228 |      \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1229 |      \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1230 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1231 | \noindent | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1232 |      We believe our recursive function @{term bsimp} simplifies bitcoded regular
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1233 | expressions as intended by Sulzmann and Lu. There is no point in applying the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1234 |      @{text bsimp} function repeatedly (like the simplification in their paper which needs to be
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1235 |      applied until a fixpoint is reached) because we can show that @{term bsimp} is idempotent,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1236 | that is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1237 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1238 |      \begin{proposition}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1239 |      @{term "bsimp (bsimp r) = bsimp r"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1240 |      \end{proposition}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1241 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1242 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1243 |      This can be proved by induction on @{text r} but requires a detailed analysis
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1244 | that the de-nesting of alternatives always results in a flat list of regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1245 | expressions. We omit the details since it does not concern the correctness proof. | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1246 |      %It might be interesting to not that we do not simplify inside @{term STAR} and
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1247 |      %@{text NT}: the reason is that we want to keep the
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1248 | |
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1249 | Next we can include simplification after each derivative step leading to the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1250 | following notion of bitcoded derivatives: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1251 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1252 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1253 |       \begin{tabular}{cc}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1254 |       \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1255 |       @{thm (lhs) bders_simp.simps(1)} & $\dn$ & @{thm (rhs) bders_simp.simps(1)}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1256 |       \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1257 | & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1258 |       \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1259 |       @{thm (lhs) bders_simp.simps(2)} & $\dn$ & @{thm (rhs) bders_simp.simps(2)}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1260 |       \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1261 |       \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1262 |       \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1263 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1264 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1265 | and use it in the improved lexing algorithm defined as | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1266 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1267 |      \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1268 | \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1269 |   $\textit{blexer}^+\;r\,s$ & $\dn$ &
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1270 |       $\textit{let}\;r_{der} = (r^\uparrow)\backslash_{bsimp}\, s\;\textit{in}$                
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1271 |       $\;\;\textit{if}\; \textit{bnullable}(r_{der}) \;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1272 |        \;\;\textit{else}\;\textit{None}$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1273 | \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1274 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1275 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1276 |        \noindent The remaining task is to show that @{term blexer} and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1277 |        @{term "blexer_simp"} generate the same answers.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1278 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1279 | When we first | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1280 | attempted this proof we encountered a problem with the idea | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1281 | in Sulzmann and Lu's paper where the argument seems to be to appeal | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1282 |        again to the @{text retrieve}-function defined for the unsimplified version
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1283 | of the algorithm. But | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1284 | this does not work, because desirable properties such as | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1285 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1286 | \[ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1287 |      @{text "retrieve r v = retrieve (bsimp r) v"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1288 | \] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1289 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1290 | \noindent do not hold under simplification---this property | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1291 | essentially purports that we can retrieve the same value from a | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1292 |      simplified version of the regular expression. To start with @{text retrieve}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1293 |      depends on the fact that the value @{text v} corresponds to the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1294 |      structure of the regular expression @{text r}---but the whole point of simplification
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1295 | is to ``destroy'' this structure by making the regular expression simpler. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1296 |      To see this consider the regular expression @{term "r = ALT r' ZERO"} and a corresponding
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1297 |      value @{text "v = Left v'"}. If we annotate bitcodes to @{text "r"}, then 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1298 |      we can use @{text retrieve} with @{text r} and @{text v} in order to extract a corresponding
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1299 |      bitsequence. The reason that this works is that @{text r} is an alternative
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1300 |      regular expression and @{text v} a corresponding @{text "Left"}-value. However, if we simplify
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1301 |      @{text r}, then @{text v} does not correspond to the shape of the regular 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1302 | expression anymore. So unless one can somehow | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1303 | synchronise the change in the simplified regular expressions with | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1304 |      the original POSIX value, there is no hope of appealing to @{text retrieve} in the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1305 |      correctness argument for @{term blexer_simp}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1306 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1307 | We found it more helpful to introduce the rewriting systems shown in | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1308 |      Figure~\ref{SimpRewrites}. The idea is to generate 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1309 |      simplified regular expressions in small steps (unlike the @{text bsimp}-function which
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1310 | does the same in a big step), and show that each of | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1311 | the small steps preserves the bitcodes that lead to the final POSIX value. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1312 | The rewrite system is organised such that $\leadsto$ is for bitcoded regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1313 |      expressions and $\stackrel{s}{\leadsto}$ for lists of bitcoded regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1314 | expressions. The former essentially implements the simplifications of | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1315 |      @{text "bsimpSEQ"} and @{text flts}; while the latter implements the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1316 |      simplifications in @{text "bsimpALTs"}. We can show that any bitcoded
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1317 | regular expression reduces in zero or more steps to the simplified | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1318 |      regular expression generated by @{text bsimp}:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1319 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1320 |      \begin{lemma}\label{lemone}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1321 |      @{thm[mode=IfThen] rewrites_to_bsimp}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1322 |      \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1323 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1324 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1325 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1326 | \noindent | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1327 |      We can also show that this rewrite system preserves @{term bnullable}, that 
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1328 | is simplification does not affect nullability: | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1329 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1330 |      \begin{lemma}\label{lembnull}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1331 |      @{thm[mode=IfThen] bnullable0(1)[of "r\<^sub>1" "r\<^sub>2"]}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1332 |      \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1333 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1334 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1335 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1336 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1337 |      From this, we can show that @{text bmkeps} will produce the same bitsequence
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1338 | as long as one of the bitcoded regular expressions in $\leadsto$ is nullable (this lemma | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1339 |      establishes the missing fact we were not able to establish using @{text retrieve}, as suggested
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1340 | in the paper by Sulzmannn and Lu). | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1341 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1342 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1343 |      \begin{lemma}\label{lemthree}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1344 |      @{thm[mode=IfThen] rewrite_bmkeps_aux(1)[of "r\<^sub>1" "r\<^sub>2"]}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1345 |      \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1346 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1347 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1348 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1349 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1350 | Crucial is also the fact that derivative steps and simplification steps can be interleaved, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1351 | which is shown by the fact that $\leadsto$ is preserved under derivatives. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1352 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1353 |      \begin{lemma}\label{bderlem}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1354 |      @{thm[mode=IfThen] rewrite_preserves_bder(1)[of "r\<^sub>1" "r\<^sub>2"]}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1355 |      \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1356 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1357 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1358 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1359 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1360 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1361 |      Using this fact together with Lemma~\ref{lemone} allows us to prove the central lemma
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1362 | that the unsimplified | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1363 |      derivative (with a string @{term s}) reduces to the simplified derivative (with the same string).
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1364 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1365 |      \begin{lemma}\label{lemtwo}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1366 |      @{thm[mode=IfThen] central}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1367 |      \end{lemma}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1368 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1369 |      %\begin{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1370 |      %By reverse induction on @{term s} generalising over @{text r}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1371 |      %\end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1372 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1373 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1374 |      With these lemmas in place we can finally establish that @{term "blexer_simp"} and @{term "blexer"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1375 |      generate the same value, and using Theorem~\ref{thmone} from the previous section that this value
 | 
| 599 | 1376 |      is indeed the POSIX value as generated by \textit{lexer}.
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1377 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1378 |      \begin{theorem}
 | 
| 599 | 1379 |      @{thm[mode=IfThen] main_blexer_simp[symmetric]} \; (@{text "= lexer r s"}\; by Thm.~\ref{thmone})
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1380 |      \end{theorem}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1381 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1382 |      %\begin{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1383 |      %By unfolding the definitions and using Lemmas~\ref{lemtwo} and \ref{lemthree}. 	
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1384 |      %\end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1385 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1386 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1387 |      This means that if the algorithm is called with a regular expression @{term r} and a string
 | 
| 498 
ab626b60ee64
fixed tiny typo in the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
496diff
changeset | 1388 |      @{term s} with $@{term s} \in L(@{term r})$, it will return @{term "Some v"} for the unique
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1389 |      @{term v} we defined by the POSIX relation $(@{term s}, @{term r}) \rightarrow @{term v}$; otherwise
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1390 |      the algorithm returns @{term "None"} when $s \not\in L(r)$ and no such @{text v} exists.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1391 | This completes the correctness proof for the second POSIX lexing algorithm by Sulzmann and Lu. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1392 | The interesting point of this algorithm is that the sizes of derivatives do not grow arbitrarily big but | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1393 | can be finitely bounded, which | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1394 | we shall show next. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1395 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1396 |    \begin{figure}[t]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1397 |   \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1398 |   \begin{tabular}{@ {\hspace{-8mm}}c@ {}}
 | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1399 | \\[-7mm] | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1400 |   @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}$S\ZERO{}_l$\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1401 |   @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}$S\ZERO{}_r$\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1402 |   @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}$S\ONE$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1403 |   @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}SL\qquad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1404 |   @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}SR\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1405 |   @{thm[mode=Axiom] bs6}$A0$\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1406 |   @{thm[mode=Axiom] bs7}$A1$\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1407 |   @{thm[mode=Rule] bs10[of "rs\<^sub>1" "rs\<^sub>2"]}$AL$\\
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1408 |   @{thm[mode=Rule] rrewrite_srewrite.ss2[of "rs\<^sub>1" "rs\<^sub>2"]}$LT$\quad
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1409 |   @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}$LH$\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1410 |   @{thm[mode=Axiom] ss4}$L\ZERO$\quad
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1411 |   @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}$LS$\medskip\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1412 |   @{thm[mode=Rule] ss6[of "r\<^sub>2" "r\<^sub>1" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}$LD$\\[-5mm]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1413 |   \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1414 |   \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1415 |   \caption{The rewrite rules that generate simplified regular expressions
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1416 |   in small steps: @{term "rrewrite r\<^sub>1 r\<^sub>2"} is for bitcoded regular
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1417 |   expressions and @{term "srewrite rs\<^sub>1 rs\<^sub>2"} for \emph{lists} of bitcoded
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1418 | expressions. Interesting is the $LD$ rule that allows copies of regular | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1419 | expressions to be removed provided a regular expression earlier in the list can | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1420 |   match the same strings.}\label{SimpRewrites}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1421 |   \end{figure}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1422 | *} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1423 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1424 | section {* Finiteness of Derivatives *}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1425 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1426 | text {*
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1427 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1428 | In this section let us sketch our argument for why the size of the simplified | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1429 | derivatives with the aggressive simplification function can be finitely bounded. Suppose | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1430 | we have a size function for bitcoded regular expressions, written | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1431 | $\llbracket r\rrbracket$, which counts the number of nodes if we regard $r$ as a tree | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1432 | (we omit the precise definition; ditto for lists $\llbracket r\!s\rrbracket$). For this we show that for every $r$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1433 | there exists a bound $N$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1434 | such that | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1435 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1436 | \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1437 | $\forall s. \; \llbracket@{term "bders_simp r s"}\rrbracket \leq N$
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1438 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1439 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1440 | \noindent | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1441 | Note that the bound $N$ is a bound for \emph{all} strings, no matter how long they are.
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1442 | We establish this bound by induction on $r$. The base cases for @{term AZERO},
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1443 | @{term "AONE bs"} and @{term "ACHAR bs c"} are straightforward. The interesting case is
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1444 | for sequences of the form @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}. In this case our induction
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1445 | hypotheses state $\exists N_1. \forall s. \; \llbracket{}@{term "bders_simp r\<^sub>1 s"}\rrbracket \leq N_1$ and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1446 | $\exists N_2. \forall s. \; \llbracket@{term "bders_simp r\<^sub>2 s"}\rrbracket \leq N_2$. We can reason as follows
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1447 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1448 | \begin{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1449 | \begin{tabular}{lcll}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1450 | & & $ \llbracket@{term "bders_simp (ASEQ bs r\<^sub>1 r\<^sub>2) s"}\rrbracket$\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1451 | & $ = $ & $\llbracket bsimp\,(\textit{ALTs}\;bs\;((@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}) ::
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1452 |     [@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suffix}(@{text r}_1, s)]))\rrbracket $ & (1) \\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1453 | & $\leq$ & | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1454 |     $\llbracket\textit{distinctWith}\,((@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}) ::
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1455 |     [@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suffix}(@{text r}_1, s)])\,\approx\;@{term "{}"}\rrbracket + 1 $ & (2) \\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1456 | & $\leq$ & $\llbracket@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}\rrbracket +
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1457 |              \llbracket\textit{distinctWith}\,[@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suffix}(@{text r}_1, s)]\,\approx\;@{term "{}"}\rrbracket + 1 $ & (3) \\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1458 | & $\leq$ & $N_1 + \llbracket r_2\rrbracket + 2 + | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1459 |       \llbracket\textit{distinctWith}\,[@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suffix}(@{text r}_1, s)]\,\approx\;@{term "{}"}\rrbracket$ & (4)\\
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1460 | & $\leq$ & $N_1 + \llbracket r_2\rrbracket + 2 + l_{N_{2}} * N_{2}$ & (5)
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1461 | \end{tabular}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1462 | \end{center}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1463 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1464 | % tell Chengsong about Indian paper of closed forms of derivatives | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1465 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1466 | \noindent | 
| 502 
1ab693d6342f
fixed a small typo in the Papaer.thy
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
499diff
changeset | 1467 | where in (1) the $\textit{Suffix}(@{text "r"}_1, s)$ are all the suffixes of $s$ where @{term "bders_simp r\<^sub>1 s'"} is nullable ($s'$ being a suffix of $s$).
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1468 | In (3) we know that  $\llbracket@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}\rrbracket$ is 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1469 | bounded by $N_1 + \llbracket{}r_2\rrbracket + 1$. In (5) we know the list comprehension contains only regular expressions of size smaller
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1470 | than $N_2$. The list length after @{text distinctWith} is bounded by a number, which we call $l_{N_2}$. It stands
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1471 | for the number of distinct regular expressions smaller than $N_2$ (there can only be finitely many of them). | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1472 | We reason similarly for @{text STAR}.\smallskip
 | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1473 | |
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1474 | |
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1475 | Clearly we give in this finiteness argument (Step (5)) a very loose bound that is | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1476 | far from the actual bound we can expect. We can do better than this, but this does not improve | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1477 | the finiteness property we are proving. If we are interested in a polynomial bound, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1478 | one would hope to obtain a similar tight bound as for partial | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1479 | derivatives introduced by Antimirov \cite{Antimirov95}. After all the idea with
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1480 | @{text distinctWith} is to maintain a ``set'' of alternatives (like the sets in
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1481 | partial derivatives). Unfortunately to obtain the exact same bound would mean | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1482 | we need to introduce simplifications, such as | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1483 | $(r_1 + r_2) \cdot r_3 \longrightarrow (r_1 \cdot r_3) + (r_2 \cdot r_3)$, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1484 | which exist for partial derivatives. However, if we introduce them in our | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1485 | setting we would lose the POSIX property of our calculated values. For example | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1486 | given the regular expressions @{term "SEQ (ALT a ab) (ALT b ONE)"} and the string $[a, b]$, then our
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1487 | algorithm generates the following correct POSIX value | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1488 | % | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1489 | \[ | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1490 | @{term "Seq (Right (Seq (Char a) (Char b))) (Right Empty)"}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1491 | \] | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1492 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1493 | \noindent | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1494 | Essentially it matches the string with the longer @{text "Right"}-alternative in the
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1495 | first sequence (and then the `rest' with the empty regular expression @{const ONE} from the second sequence). 
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1496 | If we add the simplification above, then we obtain the following value | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1497 | % | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1498 | \[ | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1499 | @{term "Seq (Left (Char a)) (Left (Char b))"}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1500 | \] | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1501 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1502 | \noindent | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1503 | where the @{text "Left"}-alternatives get priority. However, this violates
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1504 | the POSIX rules and we have not been able to | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1505 | reconcile this problem. Therefore we leave better bounds for future work.\\[-6.5mm] | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1506 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1507 | *} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1508 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1509 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1510 | section {* Conclusion *}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1511 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1512 | text {*
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1513 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1514 | We set out in this work to prove in Isabelle/HOL the correctness of | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1515 | the second POSIX lexing algorithm by Sulzmann and Lu | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1516 |    \cite{Sulzmann2014}. This follows earlier work where we established
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1517 | the correctness of the first algorithm | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1518 |    \cite{AusafDyckhoffUrban2016}. In the earlier work we needed to
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1519 | introduce our own specification about what POSIX values are, | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1520 | because the informal definition given by Sulzmann and Lu did not | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1521 | stand up to a formal proof. Also for the second algorithm we needed | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1522 | to introduce our own definitions and proof ideas in order to establish the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1523 | correctness. Our interest in the second algorithm | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1524 | lies in the fact that by using bitcoded regular expressions and an aggressive | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1525 | simplification method there is a chance that the derivatives | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1526 | can be kept universally small (we established in this paper that | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1527 | they can be kept finitely bounded for any string). | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1528 | %This is important if one is after | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1529 | %an efficient POSIX lexing algorithm based on derivatives. | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1530 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1531 | Having proved the correctness of the POSIX lexing algorithm, which | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1532 | lessons have we learned? Well, we feel this is a very good example | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1533 | where formal proofs give further insight into the matter at | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1534 |    hand. For example it is very hard to see a problem with @{text nub}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1535 |    vs @{text distinctWith} with only experimental data---one would still
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1536 | see the correct result but find that simplification does not simplify in well-chosen, but not | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1537 | obscure, examples. | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1538 | %We found that from an implementation | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1539 | %point-of-view it is really important to have the formal proofs of | 
| 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1540 | %the corresponding properties at hand. | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1541 | |
| 599 | 1542 | With the results reported here, we can of course only make a claim about the correctness | 
| 1543 | of the algorithm and the sizes of the | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1544 | derivatives, not about the efficiency or runtime of our version of | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1545 | Sulzman and Lu's algorithm. But we found the size is an important | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1546 | first indicator about efficiency: clearly if the derivatives can | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1547 | grow to arbitrarily big sizes and the algorithm needs to traverse | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1548 | the derivatives possibly several times, then the algorithm will be | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1549 | slow---excruciatingly slow that is. Other works seems to make | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1550 | stronger claims, but during our work we have developed a healthy | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1551 | suspicion when for example experimental data is used to back up | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1552 | efficiency claims. For example Sulzmann and Lu write about their | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1553 |    equivalent of @{term blexer_simp} \textit{``...we can incrementally
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1554 | compute bitcoded parse trees in linear time in the size of the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1555 |    input''} \cite[Page 14]{Sulzmann2014}.  Given the growth of the
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1556 | derivatives in some cases even after aggressive simplification, | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1557 | this is a hard to believe claim. A similar claim about a | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1558 |    theoretical runtime of @{text "O(n\<^sup>2)"} is made for the
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1559 | Verbatim lexer, which calculates tokens according to POSIX | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1560 |    rules~\cite{verbatim}. For this, Verbatim uses Brzozowski's
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1561 |    derivatives like in our work.  The authors write: \textit{``The
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1562 | results of our empirical tests [..] confirm that Verbatim has | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1563 |    @{text "O(n\<^sup>2)"} time complexity.''}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1564 |    \cite[Section~VII]{verbatim}.  While their correctness proof for
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1565 | Verbatim is formalised in Coq, the claim about the runtime | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1566 | complexity is only supported by some emperical evidence obtained by | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1567 | using the code extraction facilities of Coq. Given our observation | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1568 | with the ``growth problem'' of derivatives, we tried out their | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1569 |    extracted OCaml code with the example \mbox{@{text "(a +
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1570 | aa)\<^sup>*"}} as a single lexing rule, and it took for us around 5 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1571 | minutes to tokenise a string of 40 $a$'s and that increased to | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1572 | approximately 19 minutes when the string is 50 $a$'s long. Taking | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1573 | into account that derivatives are not simplified in the Verbatim | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1574 | lexer, such numbers are not surprising. Clearly our result of | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1575 | having finite derivatives might sound rather weak in this context | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1576 | but we think such effeciency claims really require further | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1577 | scrutiny. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1578 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1579 | The contribution of this paper is to make sure derivatives do not | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1580 |    grow arbitrarily big (universially) In the example \mbox{@{text "(a
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1581 |    + aa)\<^sup>*"}}, \emph{all} derivatives have a size of 17 or
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1582 | less. The result is that lexing a string of, say, 50\,000 a's with | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1583 |    the regular expression \mbox{@{text "(a + aa)\<^sup>*"}} takes
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1584 | approximately 10 seconds with our Scala implementation of the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1585 | presented algorithm. | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1586 | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1587 | Finally, let us come back to the point about bounded regular | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1588 |    expressions. We have in this paper only shown that @{term "NTIMES r
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1589 | n"} can be included, but all our results extend straightforwardly | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1590 | also to the other bounded regular expressions. We find bounded | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1591 | regular expressions fit naturally into the setting of Brzozowski | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1592 | derivatives and the bitcoded regular expressions by Sulzmann and Lu. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1593 | In contrast bounded regular expressions are often the Achilles' | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1594 | heel in regular expression matchers that use the traditional | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1595 | automata-based approach to lexing, primarily because they need to expand the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1596 | counters of bounded regular expressions into $n$-connected copies | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1597 | of an automaton. This is not needed in Sulzmann and Lu's algorithm. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1598 |    To see the difference consider for example the regular expression @{term "SEQ (NTIMES a
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1599 | 1001) (STAR a)"}, which is not permitted in the Go language because | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1600 | the counter is too big. In contrast we have no problem with | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1601 | matching this regular expression with, say 50\,000 a's, because the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1602 | counters can be kept compact. In fact, the overall size of the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1603 | derivatives is never greater than 5 in this example. Even in the | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1604 | example from Section 2, where Rust raises an error message, namely | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1605 |    \mbox{@{text "a\<^bsup>{1000}{100}{5}\<^esup>"}}, the maximum size for
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1606 | our derivatives is a moderate 14. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1607 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1608 | Let us also compare our work to the verified Verbatim++ lexer where | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1609 | the authors of the Verbatim lexer introduced a number of | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1610 | improvements and optimisations, for example memoisation | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1611 |    \cite{verbatimpp}. However, unlike Verbatim, which works with
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1612 | derivatives like in our work, Verbatim++ compiles first a regular | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1613 | expression into a DFA. While this makes lexing fast in many cases, | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1614 | with examples of bounded regular expressions like | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1615 |    \mbox{@{text "a\<^bsup>{100}{5}\<^esup>"}}
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1616 | one needs to represent them as | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1617 | sequences of $a \cdot a \cdot \ldots \cdot a$ (500 a's in sequence). We have run | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1618 | their extracted code with such a regular expression as a | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1619 | single lexing rule and a string of 50\,000 a's---lexing in this case | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1620 | takes approximately 5~minutes. We are not aware of any better | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1621 | translation using the traditional notion of DFAs. Therefore we | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1622 | prefer to stick with calculating derivatives, but attempt to make | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1623 | this calculation (in the future) as fast as possible. What we can guaranty | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1624 | with the presented work is that the maximum size of the derivatives | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1625 | for this example is not bigger than 9. This means our Scala | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1626 | implementation only needs a few seconds for this example. | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1627 | % | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1628 | % | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1629 | %Possible ideas are | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1630 | %zippers which have been used in the context of parsing of | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1631 |    %context-free grammars \cite{zipperparser}.
 | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1632 | \medskip | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1633 | |
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1634 | \noindent | 
| 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1635 |    Our Isabelle code including the results from Sec.~5 is available from \url{https://github.com/urbanchr/posix}.
 | 
| 499 
6a100d32314c
updated the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
498diff
changeset | 1636 | %\\[-10mm] | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1637 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1638 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1639 | %%\bibliographystyle{plain}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1640 | \bibliography{root}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1641 | |
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1642 | \newpage | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1643 | \appendix | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1644 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1645 | \section{Some Proofs}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1646 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1647 | While we have formalised \emph{all} results in Isabelle/HOL, below we shall give some
 | 
| 569 
5af61c89f51e
updated paper and corresponding theories
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
563diff
changeset | 1648 | rough details of our reasoning in ``English'' if this is helpful for reviewing. | 
| 496 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1649 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1650 | \begin{proof}[Proof of Lemma~\ref{codedecode}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1651 | This follows from the property that | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1652 |   $\textit{decode}'\,((\textit{code}\,v) \,@\, bs)\,r = (v, bs)$ holds
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1653 | for any bit-sequence $bs$ and $\vdash v : r$. This property can be | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1654 | easily proved by induction on $\vdash v : r$. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1655 | \end{proof}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1656 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1657 | \begin{proof}[Proof of Lemma~\ref{mainlemma}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1658 | This can be proved by induction on $s$ and generalising over | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1659 | $v$. The interesting point is that we need to prove this in the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1660 | reverse direction for $s$. This means instead of cases $[]$ and | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1661 | $c\!::\!s$, we have cases $[]$ and $s\,@\,[c]$ where we unravel the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1662 |   string from the back.\footnote{Isabelle/HOL provides an induction principle
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1663 | for this way of performing the induction.} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1664 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1665 |   The case for $[]$ is routine using Lemmas~\ref{codedecode}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1666 |   and~\ref{retrievecode}. In the case $s\,@\,[c]$, we can infer from
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1667 | the assumption that $\vdash v : (r\backslash s)\backslash c$ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1668 |   holds. Hence by Prop.~\ref{Posix2} we know that 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1669 | (*) $\vdash \inj\,(r\backslash s)\,c\,v : r\backslash s$ holds too. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1670 |   By definition of $\textit{flex}$ we can unfold the left-hand side
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1671 | to be | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1672 | \[ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1673 |     \textit{Some}\,(\textit{flex}\;r\,\textit{id}\,(s\,@\,[c])\,v) =
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1674 |     \textit{Some}\,(\textit{flex}\;r\,\textit{id}\,s\,(\inj\,(r\backslash s)\,c\,v))  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1675 | \] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1676 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1677 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1678 | By IH and (*) we can rewrite the right-hand side to | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1679 |   $\textit{decode}\,(\textit{retrieve}\,(r^\uparrow\backslash s)\;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1680 | (\inj\,(r\backslash s)\,c\,\,v))\,r$ which is equal to | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1681 |   $\textit{decode}\,(\textit{retrieve}\, (r^\uparrow\backslash
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1682 | (s\,@\,[c]))\,v)\,r$ as required. The last rewrite step is possible | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1683 | because we generalised over $v$ in our induction. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1684 | \end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1685 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1686 | \begin{proof}[Proof of Theorem~\ref{thmone}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1687 |   We can first expand both sides using Lem.~\ref{flex} and the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1688 |   definition of \textit{blexer}. This gives us two
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1689 |   \textit{if}-statements, which we need to show to be equal. By 
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1690 |   Lemma~\ref{bnullable}\textit{(2)} we know the \textit{if}-tests coincide:
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1691 |   $\textit{bnullable}(r^\uparrow\backslash s) \;\textit{iff}\;
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1692 | \nullable(r\backslash s)$. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1693 |   For the \textit{if}-branch suppose $r_d \dn r^\uparrow\backslash s$ and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1694 |   $d \dn r\backslash s$. We have (*) @{text "nullable d"}. We can then show
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1695 |   by Lemma~\ref{bnullable}\textit{(3)} that
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1696 | % | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1697 | \[ | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1698 |     \textit{decode}(\textit{bmkeps}\:r_d)\,r =
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1699 |     \textit{decode}(\textit{retrieve}\,r_d\,(\textit{mkeps}\,d))\,r
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1700 | \] | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1701 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1702 | \noindent | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1703 | where the right-hand side is equal to | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1704 |   $\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,(\textit{mkeps}\,
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1705 |   d))$ by Lemma~\ref{mainlemma} (we know
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1706 |   $\vdash \textit{mkeps}\,d : d$ by (*)).  This shows the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1707 |   \textit{if}-branches return the same value. In the
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1708 |   \textit{else}-branches both \textit{lexer} and \textit{blexer} return
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1709 |   \textit{None}. Therefore we can conclude the proof.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1710 | \end{proof}  
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1711 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1712 | \begin{proof}[Proof of Lemma~\ref{lemone}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1713 |      By induction on @{text r}. For this we can use the properties
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1714 |      @{thm fltsfrewrites} and @{text "rs"}$\;\stackrel{s}{\leadsto}^*$@{text "distinctWith rs \<approx>"} @{term "{}"}. The latter uses
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1715 | repeated applications of the $LD$ rule which allows the removal | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1716 | of duplicates that can recognise the same strings. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1717 |      \end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1718 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1719 |     \begin{proof}[Proof of Lemma~\ref{lembnull}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1720 |      Straightforward mutual induction on the definition of $\leadsto$ and $\stackrel{s}{\leadsto}$.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1721 | The only interesting case is the rule $LD$ where the property holds since by the side-conditions of that rule the empty string will | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1722 |      be in both @{text "L (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ [r\<^sub>2] @ rs\<^sub>c)"} and
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1723 |      @{text "L (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ rs\<^sub>c)"}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1724 |      \end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1725 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1726 |  \begin{proof}[Proof of Lemma \ref{lemthree}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1727 |      By straightforward mutual induction on the definition of $\leadsto$ and $\stackrel{s}{\leadsto}$.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1728 | Again the only interesting case is the rule $LD$ where we need to ensure that | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1729 |      @{text "bmkeps (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ [r\<^sub>2] @ rs\<^sub>c) =
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1730 | bmkeps (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ rs\<^sub>c)"} holds. | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1731 | This is indeed the case because according to the POSIX rules the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1732 | generated bitsequence is determined by the first alternative that can match the | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1733 | string (in this case being nullable). | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1734 |      \end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1735 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1736 |      \begin{proof}[Proof of Lemma~\ref{bderlem}]
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1737 |      By straightforward mutual induction on the definition of $\leadsto$ and $\stackrel{s}{\leadsto}$.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1738 |      The case for $LD$ holds because @{term "L (erase (bder c r\<^sub>2)) \<subseteq> L (erase (bder c r\<^sub>1))"}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1739 |      if and only if @{term "L (erase (r\<^sub>2)) \<subseteq> L (erase (r\<^sub>1))"}.
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1740 |      \end{proof}
 | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1741 | *} | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1742 | |
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1743 | (*<*) | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1744 | end | 
| 
f493a20feeb3
updated to include the paper
 Christian Urban <christian.urban@kcl.ac.uk> parents: diff
changeset | 1745 | (*>*) |