author | Chengsong |
Wed, 23 Aug 2023 03:02:31 +0100 | |
changeset 668 | 3831621d7b14 |
parent 664 | ba44144875b1 |
permissions | -rwxr-xr-x |
468 | 1 |
%% This BibTeX bibliography file was created using BibDesk. |
2 |
%% https://bibdesk.sourceforge.io/ |
|
3 |
||
626 | 4 |
%% Created for CS TAN at 2022-11-20 17:26:32 +0000 |
668
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
5 |
@inproceedings{LEX, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
6 |
title={Lex—a lexical analyzer generator}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
7 |
author={Michael E. Lesk and Eric E. Schmidt}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
8 |
year={1990}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
9 |
url={https://api.semanticscholar.org/CorpusID:7900881} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
10 |
} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
11 |
@article{RegularExpressions, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
12 |
title={REPRESENTATION OF EVENTS IN NERVE NETS AND FINITE AUTOMATA$^1$}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
13 |
author={Kleene, SC}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
14 |
journal={Automata Studies: Annals of Mathematics Studies. Number 34}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
15 |
number={34}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
16 |
pages={3}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
17 |
year={1956}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
18 |
publisher={Princeton University Press} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
19 |
} |
468 | 20 |
%% Saved with string encoding Unicode (UTF-8) |
626 | 21 |
@inproceedings{Doczkal2013, |
627 | 22 |
author = {C.~Doczkaland J.O.~Kaiser and G.~Smolka}, |
626 | 23 |
title = {A Constructive Theory of Regular Languages in Coq}, |
24 |
year = {2013}, |
|
25 |
isbn = {9783319035444}, |
|
26 |
publisher = {Springer-Verlag}, |
|
27 |
address = {Berlin, Heidelberg}, |
|
28 |
url = {https://doi.org/10.1007/978-3-319-03545-1_6}, |
|
29 |
doi = {10.1007/978-3-319-03545-1_6}, |
|
30 |
abstract = {We present a formal constructive theory of regular languages consisting of about 1400 lines of Coq/Ssreflect. As representations we consider regular expressions, deterministic and nondeterministic automata, and Myhill and Nerode partitions. We construct computable functions translating between these representations and show that equivalence of representations is decidable. We also establish the usual closure properties, give a minimization algorithm for DFAs, and prove that minimal DFAs are unique up to state renaming. Our development profits much from Ssreflect's support for finite types and graphs.}, |
|
31 |
booktitle = {Proceedings of the Third International Conference on Certified Programs and Proofs - Volume 8307}, |
|
32 |
pages = {82–97}, |
|
33 |
numpages = {16}, |
|
34 |
keywords = {finite automata, regular expressions, Myhill-Nerode, Ssreflect, Coq, regular languages} |
|
609 | 35 |
} |
36 |
||
627 | 37 |
@inproceedings{Lammich2012, |
38 |
author = {P.~Lammichand T.~Tuerk}, |
|
39 |
year = {2012}, |
|
40 |
month = {08}, |
|
41 |
pages = {166-182}, |
|
42 |
title = {Applying Data Refinement for Monadic Programs to Hopcroft's Algorithm}, |
|
43 |
isbn = {978-3-642-32346-1}, |
|
44 |
doi = {10.1007/978-3-642-32347-8_12} |
|
45 |
} |
|
626 | 46 |
|
47 |
@article{Krauss2012, |
|
627 | 48 |
author={A.~Krauss and T.~Nipkow}, |
626 | 49 |
title={Proof Pearl: Regular Expression Equivalence and Relation Algebra}, |
50 |
journal={J. Automated Reasoning},volume=49,pages={95--106},year=2012,note={published online March 2011}} |
|
51 |
||
52 |
||
53 |
||
54 |
@article{kleene1956, |
|
55 |
title={Representation of events in nerve nets and finite automata}, |
|
56 |
author={S.C.~Kleene and others}, |
|
57 |
journal={Automata studies}, |
|
58 |
volume={34}, |
|
59 |
pages={3--41}, |
|
60 |
year={1956}, |
|
61 |
publisher={Princeton, NJ} |
|
62 |
} |
|
63 |
||
628 | 64 |
@inproceedings{Paulson2015, |
65 |
author = {L.~C.~Paulson}, |
|
66 |
booktitle = {Proc.~of the 25th International Conference on Automated Deduction (CADE)}, |
|
67 |
pages = {231--245}, |
|
68 |
series = {LNAI}, |
|
69 |
title = {{A} {F}ormalisation of {F}inite {A}utomata {U}sing {H}ereditarily {F}inite {S}ets}, |
|
70 |
volume = {9195}, |
|
71 |
year = {2015}} |
|
609 | 72 |
|
73 |
||
626 | 74 |
@article{Murugesan2014, |
75 |
author = {N.~Murugesan and O.~V.~Shanmuga Sundaram}, |
|
76 |
journal = {International Journal of Computer Trends and Technology}, |
|
77 |
number = {1}, |
|
78 |
pages = {29--33}, |
|
79 |
title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions}, |
|
80 |
url = {http://arxiv.org/abs/1407.5902}, |
|
81 |
volume = {13}, |
|
82 |
year = {2014}, |
|
83 |
bdsk-url-1 = {http://arxiv.org/abs/1407.5902}} |
|
84 |
||
85 |
@phdthesis{Ausaf, |
|
86 |
author = {F.~Ausaf}, |
|
87 |
school = {King's College London}, |
|
88 |
title = {{V}erified {L}exing and {P}arsing}, |
|
89 |
year = {2018}} |
|
90 |
||
91 |
@inproceedings{Okui10, |
|
92 |
abstract = {This paper offers a new efficient regular expression matching algorithm which follows the POSIX-type leftmost-longest rule. The algorithm basically emulates the subset construction without backtracking, so that its computational cost even in the worst case does not explode exponentially; the time complexity of the algorithm is O(mn(n{\thinspace}+{\thinspace}c)), where m is the length of a given input string, n the number of occurrences of the most frequently used letter in a given regular expression and c the number of subexpressions to be used for capturing substrings. A formalization of the leftmost-longest semantics by using parse trees is also discussed.}, |
|
93 |
address = {Berlin, Heidelberg}, |
|
94 |
author = {S.~Okui and T.~Suzuki}, |
|
95 |
booktitle = {Implementation and Application of Automata}, |
|
96 |
editor = {Domaratzki, Michael and Salomaa, Kai}, |
|
97 |
isbn = {978-3-642-18098-9}, |
|
98 |
pages = {231--240}, |
|
99 |
publisher = {Springer Berlin Heidelberg}, |
|
100 |
title = {Disambiguation in Regular Expression Matching via Position Automata with Augmented Transitions}, |
|
101 |
year = {2011}} |
|
609 | 102 |
|
626 | 103 |
@article{Berglund14, |
104 |
author = {M.~Berglund, F.~Drewes and B.~Van Der Merwe}, |
|
105 |
doi = {10.4204/EPTCS.151.7}, |
|
106 |
journal = {Electronic Proceedings in Theoretical Computer Science}, |
|
107 |
month = {05}, |
|
108 |
title = {Analyzing Catastrophic Backtracking Behavior in Practical Regular Expression Matching}, |
|
109 |
volume = {151}, |
|
110 |
year = {2014}, |
|
111 |
bdsk-url-1 = {https://doi.org/10.4204/EPTCS.151.7}} |
|
112 |
||
113 |
@inproceedings{Berglund18, |
|
114 |
abstract = {Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.}, |
|
115 |
address = {Cham}, |
|
628 | 116 |
author = {M.~Berglund and W.~Bester and B.~Van Der Merwe}, |
626 | 117 |
booktitle = {Theoretical Aspects of Computing -- ICTAC 2018}, |
118 |
editor = {Fischer, Bernd and Uustalu, Tarmo}, |
|
119 |
isbn = {978-3-030-02508-3}, |
|
120 |
pages = {99--115}, |
|
121 |
publisher = {Springer International Publishing}, |
|
122 |
title = {Formalising Boost POSIX Regular Expression Matching}, |
|
123 |
year = {2018}} |
|
124 |
||
125 |
@inproceedings{Chen12, |
|
627 | 126 |
author = {H.~Chenand S.~Yu}, |
626 | 127 |
doi = {10.1007/978-3-642-27654-5_27}, |
128 |
month = {01}, |
|
129 |
pages = {343-356}, |
|
130 |
title = {Derivatives of Regular Expressions and an Application}, |
|
131 |
volume = {7160}, |
|
132 |
year = {2012}, |
|
133 |
bdsk-url-1 = {https://doi.org/10.1007/978-3-642-27654-5_27}} |
|
134 |
||
609 | 135 |
@article{Takayuki2019, |
627 | 136 |
author = {T.~Miyazaki and Y.~Minamide}, |
626 | 137 |
doi = {10.2197/ipsjjip.27.422}, |
138 |
journal = {Journal of Information Processing}, |
|
139 |
pages = {422-430}, |
|
140 |
title = {Derivatives of Regular Expressions with Lookahead}, |
|
141 |
volume = {27}, |
|
142 |
year = {2019}, |
|
143 |
bdsk-url-1 = {https://doi.org/10.2197/ipsjjip.27.422}} |
|
608 | 144 |
|
624 | 145 |
@article{FERNAU2015287, |
626 | 146 |
abstract = {A pattern α, i.e., a string that contains variables and terminals, matches a terminal word w if w can be obtained by uniformly substituting the variables of α by terminal words. Deciding whether a given terminal word matches a given pattern is NP-complete and this holds for several natural variants of the problem that result from whether or not variables can be erased, whether or not the patterns are required to be terminal-free or whether or not the mapping of variables to terminal words must be injective. We consider numerous parameters of this problem (i.e., number of variables, length of w, length of the words substituted for variables, number of occurrences per variable, cardinality of the terminal alphabet) and for all possible combinations of the parameters (and variants described above), we answer the question whether or not the problem is still NP-complete if these parameters are bounded by constants.}, |
147 |
author = {H.~Fernau and M.L.~Schmid}, |
|
148 |
doi = {https://doi.org/10.1016/j.ic.2015.03.006}, |
|
149 |
issn = {0890-5401}, |
|
150 |
journal = {Information and Computation}, |
|
151 |
keywords = {Parameterised pattern matching, Function matching, NP-completeness, Membership problem for pattern languages, Morphisms}, |
|
152 |
pages = {287-305}, |
|
153 |
title = {Pattern matching with variables: A multivariate complexity analysis}, |
|
154 |
url = {https://www.sciencedirect.com/science/article/pii/S0890540115000218}, |
|
155 |
volume = {242}, |
|
156 |
year = {2015}, |
|
157 |
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0890540115000218}, |
|
158 |
bdsk-url-2 = {https://doi.org/10.1016/j.ic.2015.03.006}} |
|
624 | 159 |
|
160 |
@inproceedings{Schmid2012, |
|
626 | 161 |
abstract = {We study different possibilities of combining the concept of homomorphic replacement with regular expressions in order to investigate the class of languages given by extended regular expressions with backreferences (REGEX). It is shown in which regard existing and natural ways to do this fail to reach the expressive power of REGEX. Furthermore, the complexity of the membership problem for REGEX with a bounded number of backreferences is considered.}, |
162 |
address = {Berlin, Heidelberg}, |
|
163 |
author = {M.L.~Schmid}, |
|
164 |
booktitle = {Proceedings of the 16th International Conference on Developments in Language Theory}, |
|
165 |
doi = {10.1007/978-3-642-31653-1_8}, |
|
166 |
isbn = {9783642316524}, |
|
167 |
keywords = {extended regular expressions, pattern languages, REGEX, pattern expressions, homomorphic replacement}, |
|
168 |
location = {Taipei, Taiwan}, |
|
169 |
numpages = {12}, |
|
170 |
pages = {73--84}, |
|
171 |
publisher = {Springer-Verlag}, |
|
172 |
series = {DLT'12}, |
|
173 |
title = {Inside the Class of REGEX Languages}, |
|
174 |
url = {https://doi.org/10.1007/978-3-642-31653-1_8}, |
|
175 |
year = {2012}, |
|
176 |
bdsk-url-1 = {https://doi.org/10.1007/978-3-642-31653-1_8}} |
|
624 | 177 |
|
178 |
@article{BERGLUND2022, |
|
626 | 179 |
abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.}, |
628 | 180 |
author = {M.~Berglund and B.~Van Der Merwe}, |
626 | 181 |
doi = {https://doi.org/10.1016/j.tcs.2022.10.041}, |
182 |
issn = {0304-3975}, |
|
183 |
journal = {Theoretical Computer Science}, |
|
184 |
keywords = {Regular expressions, Backreferences}, |
|
185 |
title = {Re-examining regular expressions with backreferences}, |
|
186 |
url = {https://www.sciencedirect.com/science/article/pii/S0304397522006570}, |
|
187 |
year = {2022}, |
|
188 |
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397522006570}, |
|
189 |
bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2022.10.041}} |
|
608 | 190 |
|
624 | 191 |
@article{FREYDENBERGER20191, |
626 | 192 |
abstract = {Most modern libraries for regular expression matching allow back-references (i.e., repetition operators) that substantially increase expressive power, but also lead to intractability. In order to find a better balance between expressiveness and tractability, we combine these with the notion of determinism for regular expressions used in XML DTDs and XML Schema. This includes the definition of a suitable automaton model, and a generalization of the Glushkov construction. We demonstrate that, compared to their non-deterministic superclass, these deterministic regular expressions with back-references have desirable algorithmic properties (i.e., efficiently solvable membership problem and some decidable problems in static analysis), while, at the same time, their expressive power exceeds that of deterministic regular expressions without back-references.}, |
627 | 193 |
author = {D.D.~Freydenberger and M.L.~Schmid}, |
626 | 194 |
doi = {https://doi.org/10.1016/j.jcss.2019.04.001}, |
195 |
issn = {0022-0000}, |
|
196 |
journal = {Journal of Computer and System Sciences}, |
|
197 |
keywords = {Deterministic regular expression, Regex, Glushkov automaton}, |
|
198 |
pages = {1-39}, |
|
199 |
title = {Deterministic regular expressions with back-references}, |
|
200 |
url = {https://www.sciencedirect.com/science/article/pii/S0022000018301818}, |
|
201 |
volume = {105}, |
|
202 |
year = {2019}, |
|
203 |
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0022000018301818}, |
|
204 |
bdsk-url-2 = {https://doi.org/10.1016/j.jcss.2019.04.001}} |
|
624 | 205 |
|
626 | 206 |
@inproceedings{Frey2013, |
207 |
address = {Dagstuhl, Germany}, |
|
208 |
annote = {Keywords: extended regular expressions, regex, decidability, non-recursive tradeoffs}, |
|
627 | 209 |
author = {D.D.~Freydenberger}, |
626 | 210 |
booktitle = {28th International Symposium on Theoretical Aspects of Computer Science (STACS 2011)}, |
211 |
doi = {10.4230/LIPIcs.STACS.2011.507}, |
|
212 |
editor = {Thomas Schwentick and Christoph D{\"u}rr}, |
|
213 |
isbn = {978-3-939897-25-5}, |
|
214 |
issn = {1868-8969}, |
|
215 |
pages = {507--518}, |
|
216 |
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, |
|
217 |
series = {Leibniz International Proceedings in Informatics (LIPIcs)}, |
|
218 |
title = {{Extended Regular Expressions: Succinctness and Decidability}}, |
|
219 |
url = {http://drops.dagstuhl.de/opus/volltexte/2011/3039}, |
|
220 |
urn = {urn:nbn:de:0030-drops-30396}, |
|
221 |
volume = {9}, |
|
222 |
year = {2011}, |
|
223 |
bdsk-url-1 = {http://drops.dagstuhl.de/opus/volltexte/2011/3039}, |
|
224 |
bdsk-url-2 = {https://doi.org/10.4230/LIPIcs.STACS.2011.507}} |
|
624 | 225 |
|
226 |
@article{campeanu2003formal, |
|
227 |
author = {C.~C{\^a}mpeanu and K.~Salomaa and S.~Yu}, |
|
228 |
journal = {International Journal of Foundations of Computer Science}, |
|
229 |
number = {06}, |
|
230 |
pages = {1007--1018}, |
|
231 |
publisher = {World Scientific}, |
|
232 |
title = {A formal study of practical regular expressions}, |
|
233 |
volume = {14}, |
|
234 |
year = {2003}} |
|
235 |
||
236 |
@article{campeanu2009patterns, |
|
626 | 237 |
author = {C.~C{\^a}mpeanu and N.~Santean}, |
238 |
doi = {10.1007/s00236-009-0090-y}, |
|
239 |
journal = {Acta Inf.}, |
|
240 |
month = {05}, |
|
241 |
pages = {193-207}, |
|
242 |
title = {On the closure of pattern expressions languages under intersection with regular languages}, |
|
243 |
volume = {46}, |
|
244 |
year = {2009}, |
|
245 |
bdsk-url-1 = {https://doi.org/10.1007/s00236-009-0090-y}} |
|
624 | 246 |
|
247 |
@article{CAMPEANU2009Intersect, |
|
248 |
abstract = {In this paper we revisit the semantics of extended regular expressions (regex), defined succinctly in the 90s [A.V. Aho, Algorithms for finding patterns in strings, in: Jan van Leeuwen (Ed.), Handbook of Theoretical Computer Science, in: Algorithms and Complexity, vol. A, Elsevier and MIT Press, 1990, pp. 255--300] and rigorously in 2003 by C{\^a}mpeanu, Salomaa and Yu [C. C{\^a}mpeanu, K. Salomaa, S. Yu, A formal study of practical regular expressions, IJFCS 14 (6) (2003) 1007--1018], when the authors reported an open problem, namely whether regex languages are closed under the intersection with regular languages. We give a positive answer; and for doing so, we propose a new class of machines --- regex automata systems (RAS) --- which are equivalent to regex. Among others, these machines provide a consistent and convenient method of implementing regex in practice. We also prove, as a consequence of this closure property, that several languages, such as the mirror language, the language of palindromes, and the language of balanced words are not regex languages.}, |
|
627 | 249 |
author = {C.~C{\^a}mpeanu and N.~Santean}, |
624 | 250 |
doi = {https://doi.org/10.1016/j.tcs.2009.02.022}, |
251 |
issn = {0304-3975}, |
|
252 |
journal = {Theoretical Computer Science}, |
|
253 |
keywords = {Extended regular expression, Regex automata system, Regex}, |
|
254 |
note = {Formal Languages and Applications: A Collection of Papers in Honor of Sheng Yu}, |
|
255 |
number = {24}, |
|
256 |
pages = {2336-2344}, |
|
257 |
title = {On the intersection of regex languages with regular languages}, |
|
258 |
url = {https://www.sciencedirect.com/science/article/pii/S0304397509001789}, |
|
259 |
volume = {410}, |
|
260 |
year = {2009}, |
|
261 |
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789}, |
|
262 |
bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}} |
|
263 |
||
626 | 264 |
@incollection{Aho1990, |
265 |
abstract = {Publisher Summary |
|
266 |
This chapter discusses the algorithms for solving string-matching problems that have proven useful for text-editing and text-processing applications. String pattern matching is an important problem that occurs in many areas of science and information processing. In computing, it occurs naturally as part of data processing, text editing, term rewriting, lexical analysis, and information retrieval. Many text editors and programming languages have facilities for matching strings. In biology, string-matching problems arise in the analysis of nucleic acids and protein sequences, and in the investigation of molecular phylogeny. String matching is also one of the central and most widely studied problems in theoretical computer science. The simplest form of the problem is to locate an occurrence of a keyword as a substring in a sequence of characters, which is called the input string. For example, the input string queueing contains the keyword ueuei as a substring. Even for this problem, several innovative, theoretically interesting algorithms have been devised that run significantly faster than the obvious brute-force method.}, |
|
267 |
address = {Amsterdam}, |
|
268 |
author = {A.V.~Aho}, |
|
269 |
booktitle = {Algorithms and Complexity}, |
|
270 |
doi = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2}, |
|
271 |
editor = {JAN {VAN LEEUWEN}}, |
|
272 |
isbn = {978-0-444-88071-0}, |
|
273 |
pages = {255-300}, |
|
274 |
publisher = {Elsevier}, |
|
275 |
series = {Handbook of Theoretical Computer Science}, |
|
276 |
title = {CHAPTER 5 - Algorithms for Finding Patterns in Strings}, |
|
277 |
url = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102}, |
|
278 |
year = {1990}, |
|
279 |
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102}, |
|
280 |
bdsk-url-2 = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2}} |
|
625 | 281 |
|
626 | 282 |
@inproceedings{Might2011, |
283 |
abstract = {We present a functional approach to parsing unrestricted context-free grammars based on Brzozowski's derivative of regular expressions. If we consider context-free grammars as recursive regular expressions, Brzozowski's equational theory extends without modification to context-free grammars (and it generalizes to parser combinators). The supporting actors in this story are three concepts familiar to functional programmers - laziness, memoization and fixed points; these allow Brzozowski's original equations to be transliterated into purely functional code in about 30 lines spread over three functions.Yet, this almost impossibly brief implementation has a drawback: its performance is sour - in both theory and practice. The culprit? Each derivative can double the size of a grammar, and with it, the cost of the next derivative.Fortunately, much of the new structure inflicted by the derivative is either dead on arrival, or it dies after the very next derivative. To eliminate it, we once again exploit laziness and memoization to transliterate an equational theory that prunes such debris into working code. Thanks to this compaction, parsing times become reasonable in practice.We equip the functional programmer with two equational theories that, when combined, make for an abbreviated understanding and implementation of a system for parsing context-free languages.}, |
|
284 |
address = {New York, NY, USA}, |
|
285 |
author = {M.~Might and D.~Darais and D.~Spiewak}, |
|
286 |
booktitle = {Proceedings of the 16th ACM SIGPLAN International Conference on Functional Programming}, |
|
287 |
doi = {10.1145/2034773.2034801}, |
|
288 |
isbn = {9781450308656}, |
|
289 |
keywords = {derivative, parsing, context-free grammar, parser combinator, formal languages, regular expressions}, |
|
290 |
location = {Tokyo, Japan}, |
|
291 |
numpages = {7}, |
|
292 |
pages = {189--195}, |
|
293 |
publisher = {Association for Computing Machinery}, |
|
294 |
series = {ICFP '11}, |
|
295 |
title = {Parsing with Derivatives: A Functional Pearl}, |
|
296 |
url = {https://doi.org/10.1145/2034773.2034801}, |
|
297 |
year = {2011}, |
|
298 |
bdsk-url-1 = {https://doi.org/10.1145/2034773.2034801}} |
|
625 | 299 |
|
300 |
||
301 |
||
626 | 302 |
@inproceedings{Adams2016, |
303 |
title={On the complexity and performance of parsing with derivatives}, |
|
627 | 304 |
author={M.D.~Adams and C.~Hollenbeck and M.~MightMatthew}, |
626 | 305 |
booktitle={Proceedings of the 37th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
306 |
pages={224--236}, |
|
307 |
year={2016} |
|
625 | 308 |
} |
627 | 309 |
@article{Huet1997, |
310 |
author = {G.~Huet}, |
|
311 |
title = {The Zipper}, |
|
312 |
year = {1997}, |
|
313 |
issue_date = {September 1997}, |
|
314 |
publisher = {Cambridge University Press}, |
|
315 |
address = {USA}, |
|
316 |
volume = {7}, |
|
317 |
number = {5}, |
|
318 |
issn = {0956-7968}, |
|
319 |
url = {https://doi.org/10.1017/S0956796897002864}, |
|
320 |
doi = {10.1017/S0956796897002864}, |
|
321 |
abstract = {Almost every programmer has faced the problem of representing a tree together with a subtree that is the focus of attention, where that focus may move left, right, up or down the tree. The Zipper is Huet's nifty name for a nifty data structure which fulfills this need. I wish I had known of it when I faced this task, because the solution I came up with was not quite so efficient or elegant as the Zipper.}, |
|
322 |
journal = {J. Funct. Program.}, |
|
323 |
month = {sep}, |
|
324 |
pages = {549–554}, |
|
325 |
numpages = {6} |
|
326 |
} |
|
625 | 327 |
|
626 | 328 |
@article{Darragh2020, |
329 |
abstract = {Parsing with Derivatives (PwD) is an elegant approach to parsing context-free grammars (CFGs). It takes the equational theory behind Brzozowski's derivative for regular expressions and augments that theory with laziness, memoization, and fixed points. The result is a simple parser for arbitrary CFGs. Although recent work improved the performance of PwD, it remains inefficient due to the algorithm repeatedly traversing some parts of the grammar. In this functional pearl, we show how to avoid this inefficiency by suspending the state of the traversal in a zipper. When subsequent derivatives are taken, we can resume the traversal from where we left off without retraversing already traversed parts of the grammar. However, the original zipper is designed for use with trees, and we want to parse CFGs. CFGs can include shared regions, cycles, and choices between alternates, which makes them incompatible with the traditional tree model for zippers. This paper develops a generalization of zippers to properly handle these additional features. Just as PwD generalized Brzozowski's derivatives from regular expressions to CFGs, we generalize Huet's zippers from trees to CFGs. Abstract The resulting parsing algorithm is concise and efficient: it takes only 31 lines of OCaml code to implement the derivative function but performs 6,500 times faster than the original PwD and 3.24 times faster than the optimized implementation of PwD.}, |
|
330 |
address = {New York, NY, USA}, |
|
331 |
articleno = {108}, |
|
627 | 332 |
author = {P.~Darragh and M.D.~Adams}, |
626 | 333 |
doi = {10.1145/3408990}, |
334 |
issue_date = {August 2020}, |
|
335 |
journal = {Proc. ACM Program. Lang.}, |
|
336 |
keywords = {Derivatives, Zippers, Parsing with Derivatives, Parsing}, |
|
337 |
month = {aug}, |
|
338 |
number = {ICFP}, |
|
339 |
numpages = {28}, |
|
340 |
publisher = {Association for Computing Machinery}, |
|
341 |
title = {Parsing with Zippers (Functional Pearl)}, |
|
342 |
url = {https://doi.org/10.1145/3408990}, |
|
343 |
volume = {4}, |
|
344 |
year = {2020}, |
|
345 |
bdsk-url-1 = {https://doi.org/10.1145/3408990}} |
|
346 |
||
347 |
@article{Edelmann2021, |
|
348 |
abstract = {Parsing is the process that enables a computer system to make sense of raw data. Parsing is common to almost all computer systems: It is involved every time sequential data is read and elaborated into structured data. The theory of parsing usually focuses on the binary recognition aspect of parsing and eschews this essential data-elaboration aspect. In this thesis, I present a declarative framework for value-aware parsing that explicitly integrates data elaboration. |
|
625 | 349 |
Within the framework of the thesis, I present parsing algorithms that are based on the concept of Brzozowski's derivatives. Derivative-based parsing algorithms present several advantages: they are elegant, amenable to formal reasoning, and easy to implement. Unfortunately, the performance of these algorithms in practice is often not competitive with other approaches. In this thesis, I show a general technique inspired by Huet's Zipper to greatly enhance the performance of derivative-based algorithms, and I do so without compromising their elegance, amenability to formal reasoning, or ease of implementation. |
350 |
First, I present a technique for building efficient tokenisers that is based on Brzozowski's derivatives and Huet's zipper and that does not require the usual burdensome explicit conversion to automata. I prove the technique is correct in Coq and present SILEX, a Scala lexing library based on the technique. I demonstrate that the approach is competitive with state-of-the-art solutions. |
|
351 |
Then, I present a characterisation of LL(1) languages based on the concept of should-not-follow sets. I present an algorithm for parsing LL(1) languages with derivatives and zippers. I show a formal proof of the algorithm's correctness and prove its worst-case linear-time complexity. I show how the LL(1) parsing with derivatives and zippers algorithm corresponds to the traditional LL(1) parsing algorithm. |
|
352 |
I then present SCALL1ON, a Scala parsing combinators library for LL(1) languages that incorporates the LL(1) parsing with derivatives and zippers algorithm. I present an expressive and familiar combinator-based interface for describing LL(1) languages. I present techniques that help precisely locate LL(1) conflicts in user code. I discuss several advantages of the parsing with derivatives approach within the context of a parsing library. I also present SCALL1ON's enumeration and pretty-printing features and discuss their implementation. Through a series of benchmarks, I demonstrate the good performance and practicality of the approach. Finally, I present how to adapt the LL(1) parsing with derivatives and zippers algorithm to support arbitrary context-free languages. I show how the adapted algorithm corresponds to general parsing algorithms, such as Earley's parsing algorithm.}, |
|
626 | 353 |
address = {Lausanne}, |
627 | 354 |
author = {R.~Edelmann}, |
626 | 355 |
doi = {10.5075/epfl-thesis-7357}, |
356 |
institution = {IINFCOM}, |
|
357 |
pages = {246}, |
|
358 |
publisher = {EPFL}, |
|
359 |
title = {Efficient Parsing with Derivatives and Zippers}, |
|
360 |
url = {http://infoscience.epfl.ch/record/287059}, |
|
361 |
year = {2021}, |
|
362 |
bdsk-url-1 = {http://infoscience.epfl.ch/record/287059}, |
|
363 |
bdsk-url-2 = {https://doi.org/10.5075/epfl-thesis-7357}} |
|
625 | 364 |
|
627 | 365 |
|
366 |
||
367 |
||
625 | 368 |
@inproceedings{Zippy2020, |
626 | 369 |
abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.}, |
370 |
address = {New York, NY, USA}, |
|
628 | 371 |
author = {R.~Edelmann and H.~Jad and K.~Viktor}, |
626 | 372 |
booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
373 |
doi = {10.1145/3385412.3385992}, |
|
374 |
isbn = {9781450376136}, |
|
375 |
keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives}, |
|
376 |
location = {London, UK}, |
|
377 |
numpages = {16}, |
|
378 |
pages = {1036--1051}, |
|
379 |
publisher = {Association for Computing Machinery}, |
|
380 |
series = {PLDI 2020}, |
|
381 |
title = {Zippy LL(1) Parsing with Derivatives}, |
|
382 |
url = {https://doi.org/10.1145/3385412.3385992}, |
|
383 |
year = {2020}, |
|
384 |
bdsk-url-1 = {https://doi.org/10.1145/3385412.3385992}} |
|
385 |
||
386 |
@article{fowler2003, |
|
627 | 387 |
author = {G.~Fowler}, |
626 | 388 |
journal = {URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html}, |
389 |
title = {An interpretation of the POSIX regex standard}, |
|
390 |
year = {2003}} |
|
391 |
||
392 |
@inproceedings{Snort1999, |
|
393 |
abstract = {Network intrusion detection systems (NIDS) are an important part of any network security architecture. They provide a layer of defense which monitors network traffic for predefined suspicious activity or patterns, and alert system administrators when potential hostile traffic is detected. Commercial NIDS have many differences, but Information Systems departments must face the commonalities that they share such as significant system footprint, complex deployment and high monetary cost. Snort was designed to address these issues.}, |
|
394 |
address = {USA}, |
|
395 |
author = {M.~Roesch}, |
|
396 |
booktitle = {Proceedings of the 13th USENIX Conference on System Administration}, |
|
397 |
location = {Seattle, Washington}, |
|
398 |
numpages = {10}, |
|
399 |
pages = {229--238}, |
|
400 |
publisher = {USENIX Association}, |
|
401 |
series = {LISA '99}, |
|
402 |
title = {Snort - Lightweight Intrusion Detection for Networks}, |
|
403 |
year = {1999}} |
|
404 |
||
405 |
||
627 | 406 |
@article{Reps1998, |
407 |
author = {T.~Reps}, |
|
408 |
title = {“Maximal-Munch” Tokenization in Linear Time}, |
|
409 |
year = {1998}, |
|
410 |
issue_date = {March 1998}, |
|
411 |
publisher = {Association for Computing Machinery}, |
|
412 |
address = {New York, NY, USA}, |
|
413 |
volume = {20}, |
|
414 |
number = {2}, |
|
415 |
issn = {0164-0925}, |
|
416 |
url = {https://doi.org/10.1145/276393.276394}, |
|
417 |
doi = {10.1145/276393.276394}, |
|
418 |
abstract = {The lexical-analysis (or scanning) phase of a compiler attempts to partition an input string into a sequence of tokens. The convention in most languages is that the input is scanned left to right, and each token identified is a “maximal munch” of the remaining input—the longest prefix of the remaining input that is a token of the language. Although most of the standard compiler textbooks present a way to perform maximal-munch tokenization, the algorithm they describe is one that, for certain sets of token definitions, can cause the scanner to exhibit quadratic behavior in the worst case. In the article, we show that maximal-munch tokenization can always be performed in time linear in the size of the input.}, |
|
419 |
journal = {ACM Trans. Program. Lang. Syst.}, |
|
420 |
month = {mar}, |
|
421 |
pages = {259–273}, |
|
422 |
numpages = {15}, |
|
423 |
keywords = {backtracking, tokenization, tabulation, dynamic programming, memoization} |
|
424 |
} |
|
626 | 425 |
|
426 |
@inproceedings{Bro, |
|
427 |
author = {V.~Paxson}, |
|
428 |
title = {Bro: A System for Detecting Network Intruders in Real-Time}, |
|
429 |
year = {1998}, |
|
430 |
publisher = {USENIX Association}, |
|
431 |
address = {USA}, |
|
432 |
abstract = {We describe Bro, a stand-alone system for detecting network intruders in real-time by passively monitoring a network link over which the intruder's traffic transits. We give an overview of the system's design, which emphasizes high-speed (FDDI-rate) monitoring, real-time notification, clear separation between mechanism and policy, and extensibility. To achieve these ends, Bro is divided into an "event engine" that reduces a kernel-filtered network traffic stream into a series of higher-level events, and a "policy script interpreter" that interprets event handlers written in a specialized language used to express a site's security policy. Event handlers can update state information, synthesize new events, record information to disk, and generate real-time notifications via syslog. We also discuss a number of attacks that attempt to subvert passive monitoring systems and defenses against these, and give particulars of how Bro analyzes the four applications integrated into it so far: Finger, FTP, Portmapper and Telnet. The system is publicly available in source code form.}, |
|
433 |
booktitle = {Proceedings of the 7th Conference on USENIX Security Symposium - Volume 7}, |
|
434 |
pages = {3}, |
|
435 |
numpages = {1}, |
|
436 |
location = {San Antonio, Texas}, |
|
437 |
series = {SSYM'98} |
|
625 | 438 |
} |
439 |
||
627 | 440 |
@INPROCEEDINGS{Sidhu2001, |
628 | 441 |
author={R.~Sidhu and V.K.~Prasanna}, |
627 | 442 |
booktitle={The 9th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM'01)}, |
443 |
title={Fast Regular Expression Matching Using FPGAs}, |
|
444 |
year={2001}, |
|
445 |
volume={}, |
|
446 |
number={}, |
|
447 |
pages={227-238}, |
|
448 |
doi={} |
|
449 |
} |
|
450 |
@article{Turonova2020, |
|
451 |
author = {L.~Turo\v{n}ov\'{a} and L.~Hol\'{\i}k and O.~Leng\'{a}l and O.~Saarikivi and M.~Veanes and T.~Vojnar }, |
|
452 |
title = {Regex Matching with Counting-Set Automata}, |
|
453 |
year = {2020}, |
|
454 |
issue_date = {November 2020}, |
|
455 |
publisher = {Association for Computing Machinery}, |
|
456 |
address = {New York, NY, USA}, |
|
457 |
volume = {4}, |
|
458 |
number = {OOPSLA}, |
|
459 |
url = {https://doi.org/10.1145/3428286}, |
|
460 |
doi = {10.1145/3428286}, |
|
461 |
abstract = {We propose a solution to the problem of efficient matching regular expressions (regexes) with bounded repetition, such as (ab){1,100}, using deterministic automata. For this, we introduce novel counting-set automata (CsAs), automata with registers that can hold sets of bounded integers and can be manipulated by a limited portfolio of constant-time operations. We present an algorithm that compiles a large sub-class of regexes to deterministic CsAs. This includes (1) a novel Antimirov-style translation of regexes with counting to counting automata (CAs), nondeterministic automata with bounded counters, and (2) our main technical contribution, a determinization of CAs that outputs CsAs. The main advantage of this workflow is that the size of the produced CsAs does not depend on the repetition bounds used in the regex (while the size of the DFA is exponential to them). Our experimental results confirm that deterministic CsAs produced from practical regexes with repetition are indeed vastly smaller than the corresponding DFAs. More importantly, our prototype matcher based on CsA simulation handles practical regexes with repetition regardless of sizes of counter bounds. It easily copes with regexes with repetition where state-of-the-art matchers struggle.}, |
|
462 |
journal = {Proc. ACM Program. Lang.}, |
|
463 |
month = {nov}, |
|
464 |
articleno = {218}, |
|
465 |
numpages = {30}, |
|
466 |
keywords = {counting-set automata, counting automata, bounded repetition, ReDos, Antimirov's derivatives, regular expression matching, determinization} |
|
467 |
} |
|
468 |
||
469 |
@inproceedings{Kumar2006, |
|
470 |
author = {S.~Kumar and S.~Dharmapurikar and F.~Yu and P.~Crowley and J.~Turner}, |
|
471 |
title = {Algorithms to Accelerate Multiple Regular Expressions Matching for Deep Packet Inspection}, |
|
472 |
year = {2006}, |
|
473 |
isbn = {1595933085}, |
|
474 |
publisher = {Association for Computing Machinery}, |
|
475 |
address = {New York, NY, USA}, |
|
476 |
url = {https://doi.org/10.1145/1159913.1159952}, |
|
477 |
doi = {10.1145/1159913.1159952}, |
|
478 |
abstract = {There is a growing demand for network devices capable of examining the content of data packets in order to improve network security and provide application-specific services. Most high performance systems that perform deep packet inspection implement simple string matching algorithms to match packets against a large, but finite set of strings. owever, there is growing interest in the use of regular expression-based pattern matching, since regular expressions offer superior expressive power and flexibility. Deterministic finite automata (DFA) representations are typically used to implement regular expressions. However, DFA representations of regular expression sets arising in network applications require large amounts of memory, limiting their practical application.In this paper, we introduce a new representation for regular expressions, called the Delayed Input DFA (D2FA), which substantially reduces space equirements as compared to a DFA. A D2FA is constructed by transforming a DFA via incrementally replacing several transitions of the automaton with a single default transition. Our approach dramatically reduces the number of distinct transitions between states. For a collection of regular expressions drawn from current commercial and academic systems, a D2FA representation reduces transitions by more than 95%. Given the substantially reduced space equirements, we describe an efficient architecture that can perform deep packet inspection at multi-gigabit rates. Our architecture uses multiple on-chip memories in such a way that each remains uniformly occupied and accessed over a short duration, thus effectively distributing the load and enabling high throughput. Our architecture can provide ostffective packet content scanning at OC-192 rates with memory requirements that are consistent with current ASIC technology.}, |
|
479 |
booktitle = {Proceedings of the 2006 Conference on Applications, Technologies, Architectures, and Protocols for Computer Communications}, |
|
480 |
pages = {339–350}, |
|
481 |
numpages = {12}, |
|
482 |
keywords = {regular expressions, DFA, deep packet inspection}, |
|
483 |
location = {Pisa, Italy}, |
|
484 |
series = {SIGCOMM '06} |
|
485 |
} |
|
625 | 486 |
|
487 |
||
627 | 488 |
|
489 |
||
490 |
||
491 |
@inproceedings{Becchi2007, |
|
492 |
author = {M.~Becchi and P.~Crowley}, |
|
493 |
title = {An Improved Algorithm to Accelerate Regular Expression Evaluation}, |
|
494 |
year = {2007}, |
|
495 |
isbn = {9781595939456}, |
|
496 |
publisher = {Association for Computing Machinery}, |
|
497 |
address = {New York, NY, USA}, |
|
498 |
url = {https://doi.org/10.1145/1323548.1323573}, |
|
499 |
doi = {10.1145/1323548.1323573}, |
|
500 |
abstract = {Modern network intrusion detection systems need to perform regular expression matching at line rate in order to detect the occurrence of critical patterns in packet payloads. While deterministic finite automata (DFAs) allow this operation to be performed in linear time, they may exhibit prohibitive memory requirements. In [9], Kumar et al. propose Delayed Input DFAs (D2FAs), which provide a trade-off between the memory requirements of the compressed DFA and the number of states visited for each character processed, which corresponds directly to the memory bandwidth required to evaluate regular expressions.In this paper we introduce a general compression technique that results in at most 2N state traversals when processing a string of length N. In comparison to the D2FA approach, our technique achieves comparable levels of compression, with lower provable bounds on memory bandwidth (or greater compression for a given bandwidth bound). Moreover, our proposed algorithm has lower complexity, is suitable for scenarios where a compressed DFA needs to be dynamically built or updated, and fosters locality in the traversal process. Finally, we also describe a novel alphabet reduction scheme for DFA-based structures that can yield further dramatic reductions in data structure size.}, |
|
501 |
booktitle = {Proceedings of the 3rd ACM/IEEE Symposium on Architecture for Networking and Communications Systems}, |
|
502 |
pages = {145–154}, |
|
503 |
numpages = {10}, |
|
504 |
keywords = {regular expressions, DFA, deep packet inspection}, |
|
505 |
location = {Orlando, Florida, USA}, |
|
506 |
series = {ANCS '07} |
|
507 |
} |
|
508 |
||
626 | 509 |
@inproceedings{Becchi08, |
627 | 510 |
author = {M.~Becchi and , Patrick}, |
626 | 511 |
doi = {10.1145/1544012.1544037}, |
512 |
month = {01}, |
|
513 |
pages = {25}, |
|
514 |
title = {Extending finite automata to efficiently match Perl-compatible regular expressions}, |
|
515 |
year = {2008}, |
|
516 |
bdsk-url-1 = {https://doi.org/10.1145/1544012.1544037}} |
|
517 |
||
518 |
@book{Sakarovitch2009, |
|
628 | 519 |
author = {J.~Sakarovitch}, |
626 | 520 |
doi = {10.1017/CBO9781139195218}, |
521 |
editor = {Thomas, ReubenTranslator}, |
|
522 |
place = {Cambridge}, |
|
523 |
publisher = {Cambridge University Press}, |
|
524 |
title = {Elements of Automata Theory}, |
|
525 |
year = {2009}, |
|
526 |
bdsk-url-1 = {https://doi.org/10.1017/CBO9781139195218}} |
|
527 |
||
628 | 528 |
@unpublished{FoSSaCS2023, |
529 |
author = {C.~Tan and C.~Urban}, |
|
626 | 530 |
note = {submitted}, |
531 |
title = {POSIX Lexing with Bitcoded Derivatives}} |
|
532 |
||
533 |
@inproceedings{Verbatim, |
|
628 | 534 |
author = {D.~Egolf and S.~Lasser and K.~Fisher}, |
626 | 535 |
booktitle = {2021 IEEE Security and Privacy Workshops (SPW)}, |
536 |
doi = {10.1109/SPW53761.2021.00022}, |
|
537 |
pages = {92-100}, |
|
538 |
title = {Verbatim: A Verified Lexer Generator}, |
|
539 |
year = {2021}, |
|
540 |
bdsk-url-1 = {https://doi.org/10.1109/SPW53761.2021.00022}} |
|
625 | 541 |
|
542 |
||
626 | 543 |
@inproceedings{Nipkow1998, |
627 | 544 |
author = {T.~Nipkow}, |
626 | 545 |
editor = {Jim Grundy and |
546 |
Malcolm C. Newey}, |
|
547 |
title = {Verified Lexical Analysis}, |
|
548 |
booktitle = {Theorem Proving in Higher Order Logics, 11th International Conference, |
|
549 |
TPHOLs'98, Canberra, Australia, September 27 - October 1, 1998, Proceedings}, |
|
550 |
series = {Lecture Notes in Computer Science}, |
|
551 |
volume = {1479}, |
|
552 |
pages = {1--15}, |
|
553 |
publisher = {Springer}, |
|
554 |
year = {1998}, |
|
555 |
url = {https://doi.org/10.1007/BFb0055126}, |
|
556 |
doi = {10.1007/BFb0055126}, |
|
557 |
timestamp = {Tue, 14 May 2019 10:00:48 +0200}, |
|
558 |
biburl = {https://dblp.org/rec/conf/tphol/Nipkow98.bib}, |
|
559 |
bibsource = {dblp computer science bibliography, https://dblp.org} |
|
607 | 560 |
} |
561 |
||
562 |
||
601 | 563 |
@inproceedings{Verbatimpp, |
626 | 564 |
abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.}, |
565 |
address = {New York, NY, USA}, |
|
628 | 566 |
author = {D.~Egolf and S.~Lasser and K.~Fisher}, |
626 | 567 |
booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs}, |
568 |
doi = {10.1145/3497775.3503694}, |
|
569 |
isbn = {9781450391825}, |
|
570 |
keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions}, |
|
571 |
location = {Philadelphia, PA, USA}, |
|
572 |
numpages = {13}, |
|
573 |
pages = {27--39}, |
|
574 |
publisher = {Association for Computing Machinery}, |
|
575 |
series = {CPP 2022}, |
|
576 |
title = {Verbatim++: Verified, Optimized, and Semantically Rich Lexing with Derivatives}, |
|
577 |
url = {https://doi.org/10.1145/3497775.3503694}, |
|
578 |
year = {2022}, |
|
579 |
bdsk-url-1 = {https://doi.org/10.1145/3497775.3503694}} |
|
518 | 580 |
|
581 |
@article{Turo_ov__2020, |
|
628 | 582 |
author = {L.~Turo{\v{n}}ov{\'{a}} and L.~Hol{\'{\i}}k and O.~Leng{\'{a}}l and O.~Saarikivi and M.~Veanes and T.~Vojnar}, |
518 | 583 |
date-added = {2022-05-23 18:43:04 +0100}, |
584 |
date-modified = {2022-05-23 18:43:04 +0100}, |
|
585 |
doi = {10.1145/3428286}, |
|
586 |
journal = {Proceedings of the {ACM} on Programming Languages}, |
|
587 |
month = {nov}, |
|
588 |
number = {{OOPSLA}}, |
|
589 |
pages = {1--30}, |
|
590 |
publisher = {Association for Computing Machinery ({ACM})}, |
|
591 |
title = {Regex matching with counting-set automata}, |
|
592 |
url = {https://doi.org/10.1145%2F3428286}, |
|
593 |
volume = {4}, |
|
594 |
year = 2020, |
|
595 |
bdsk-url-1 = {https://doi.org/10.1145%2F3428286}, |
|
596 |
bdsk-url-2 = {https://doi.org/10.1145/3428286}} |
|
597 |
||
468 | 598 |
@article{Rathnayake2014StaticAF, |
628 | 599 |
author = {A.~Rathnayake and H.~Thielecke}, |
518 | 600 |
journal = {ArXiv}, |
601 |
title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics}, |
|
602 |
volume = {abs/1405.7058}, |
|
603 |
year = {2014}} |
|
468 | 604 |
|
472 | 605 |
@inproceedings{Weideman2017Static, |
628 | 606 |
author = {N.~Weideman}, |
518 | 607 |
title = {Static analysis of regular expressions}, |
608 |
year = {2017}} |
|
472 | 609 |
|
468 | 610 |
@inproceedings{RibeiroAgda2017, |
611 |
abstract = {We describe the formalization of a regular expression (RE) parsing algorithm that produces a bit representation of its parse tree in the dependently typed language Agda. The algorithm computes bit-codes using Brzozowski derivatives and we prove that produced codes are equivalent to parse trees ensuring soundness and completeness w.r.t an inductive RE semantics. We include the certified algorithm in a tool developed by us, named verigrep, for regular expression based search in the style of the well known GNU grep. Practical experiments conducted with this tool are reported.}, |
|
612 |
address = {New York, NY, USA}, |
|
613 |
articleno = {4}, |
|
628 | 614 |
author = {R.~Ribeiro and A.~D.~Bois}, |
468 | 615 |
booktitle = {Proceedings of the 21st Brazilian Symposium on Programming Languages}, |
616 |
date-modified = {2022-03-16 16:38:47 +0000}, |
|
617 |
doi = {10.1145/3125374.3125381}, |
|
618 |
isbn = {9781450353892}, |
|
619 |
keywords = {Certified algorithms, regular expressions, dependent types, bit-codes}, |
|
620 |
location = {Fortaleza, CE, Brazil}, |
|
621 |
numpages = {8}, |
|
622 |
publisher = {Association for Computing Machinery}, |
|
623 |
series = {SBLP 2017}, |
|
624 |
title = {Certified Bit-Coded Regular Expression Parsing}, |
|
625 |
url = {https://doi.org/10.1145/3125374.3125381}, |
|
626 |
year = {2017}, |
|
627 |
bdsk-url-1 = {https://doi.org/10.1145/3125374.3125381}} |
|
628 |
||
629 |
@article{Thompson_1968, |
|
630 |
author = {Ken Thompson}, |
|
631 |
date-added = {2022-02-23 13:44:42 +0000}, |
|
632 |
date-modified = {2022-02-23 13:44:42 +0000}, |
|
633 |
doi = {10.1145/363347.363387}, |
|
634 |
journal = {Communications of the {ACM}}, |
|
635 |
month = {jun}, |
|
636 |
number = {6}, |
|
637 |
pages = {419--422}, |
|
638 |
publisher = {Association for Computing Machinery ({ACM})}, |
|
639 |
title = {Programming Techniques: Regular expression search algorithm}, |
|
640 |
url = {https://doi.org/10.1145%2F363347.363387}, |
|
641 |
volume = {11}, |
|
642 |
year = 1968, |
|
643 |
bdsk-url-1 = {https://doi.org/10.1145%2F363347.363387}, |
|
644 |
bdsk-url-2 = {https://doi.org/10.1145/363347.363387}} |
|
645 |
||
646 |
@article{17Bir, |
|
647 |
author = {Asiri Rathnayake and Hayo Thielecke}, |
|
648 |
date-added = {2019-08-18 17:57:30 +0000}, |
|
649 |
date-modified = {2019-08-18 18:00:13 +0000}, |
|
650 |
journal = {arXiv:1405.7058}, |
|
651 |
title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics}, |
|
652 |
year = {2017}} |
|
518 | 653 |
|
471 | 654 |
@article{alfred2014algorithms, |
518 | 655 |
author = {Alfred, V}, |
656 |
journal = {Algorithms and Complexity}, |
|
657 |
pages = {255}, |
|
658 |
publisher = {Elsevier}, |
|
659 |
title = {Algorithms for finding patterns in strings}, |
|
660 |
volume = {1}, |
|
661 |
year = {2014}} |
|
471 | 662 |
|
468 | 663 |
@article{nielson11bcre, |
664 |
author = {Lasse Nielsen, Fritz Henglein}, |
|
665 |
date-added = {2019-07-03 21:09:39 +0000}, |
|
666 |
date-modified = {2019-07-03 21:17:33 +0000}, |
|
667 |
journal = {LATA}, |
|
668 |
title = {Bit-coded Regular Expression Parsing}, |
|
669 |
year = {2011}, |
|
670 |
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgTWFjIChNYWNCb29rLVBybykvRGVza3RvcC9mcml0ei1wYXBlci5wZGZPEQF+AAAAAAF+AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8PZnJpdHotcGFwZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAADAAMAAAogY3UAAAAAAAAAAAAAAAAAB0Rlc2t0b3AAAAIAQi86VXNlcnM6Y3N0YW46RHJvcGJveDpNeSBNYWMgKE1hY0Jvb2stUHJvKTpEZXNrdG9wOmZyaXR6LXBhcGVyLnBkZgAOACAADwBmAHIAaQB0AHoALQBwAGEAcABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEBVc2Vycy9jc3Rhbi9Ecm9wYm94L015IE1hYyAoTWFjQm9vay1Qcm8pL0Rlc2t0b3AvZnJpdHotcGFwZXIucGRmABMAAS8AABUAAgAM//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}} |
|
518 | 671 |
|
468 | 672 |
@software{regexploit2021, |
628 | 673 |
author = {B.~Caller, L.~Carettoni}, |
468 | 674 |
date-added = {2020-11-24 00:00:00 +0000}, |
675 |
date-modified = {2021-05-07 00:00:00 +0000}, |
|
676 |
keywords = {ReDos static analyser}, |
|
472 | 677 |
month = {May}, |
678 |
title = {regexploit}, |
|
679 |
url = {https://github.com/doyensec/regexploit}, |
|
518 | 680 |
year = {2021}, |
681 |
bdsk-url-1 = {https://github.com/doyensec/regexploit}} |
|
682 |
||
608 | 683 |
@software{pcre, |
628 | 684 |
author = {P.~Hazel}, |
608 | 685 |
date-added = {1997-01-01 00:00:00 +0000}, |
686 |
date-modified = {2021-06-14 00:00:00 +0000}, |
|
687 |
keywords = {Perl Compatible Regular Expressions}, |
|
688 |
month = {June}, |
|
689 |
title = {PCRE}, |
|
690 |
url = {https://www.pcre.org/original/doc/html/}, |
|
691 |
year = {2021}, |
|
626 | 692 |
bdsk-url-1 = {https://www.pcre.org/original/doc/html/}} |
693 |
||
694 |
@misc{communityRules, |
|
695 |
howpublished = {\url{https://www.snort.org/faq/what-are-community-rules}}, |
|
696 |
note = {[Online; last accessed 19-November-2022]}, |
|
697 |
title = {{Snort Community Rules}}, |
|
698 |
year = {2022}} |
|
608 | 699 |
|
668
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
700 |
@misc{Atom, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
701 |
howpublished = {\url{https://github.com/atom/atom}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
702 |
note = {[Online; last accessed 22-Aug-2023]}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
703 |
title = {{Atom Editor}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
704 |
year = {2023}} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
705 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
706 |
@misc{fixedAtom, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
707 |
howpublished = {\url{http://davidvgalbraith.com/how-i-fixed-atom/}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
708 |
note = {[Online; last accessed 22-Aug-2023}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
709 |
title = {{How I fixed Atom}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
710 |
year = {2016}} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
711 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
712 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
713 |
@misc{Cloudflare, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
714 |
howpublished = {\url{https://www.cloudflare.com/en-gb/}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
715 |
note = {[Online; last accessed 22-Aug-2023}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
716 |
title = {{Cloudflare}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
717 |
year = {2023}} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
718 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
719 |
@misc{CloudflareOutage, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
720 |
howpublished = {\url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
721 |
note = {[Online; last accessed 22-Aug-2023}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
722 |
title = {{Cloudflare}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
723 |
year = {2023}} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
724 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
725 |
@misc{TwainRegex, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
726 |
howpublished = {\url{https://zherczeg.github.io/sljit/regex_perf.html}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
727 |
%note = {[Online; last accessed 19-November-2022]}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
728 |
author = {Zoltan Herczeg}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
729 |
title = {{Performance comparison of regular expression engines}}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
730 |
year = {2015}} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
731 |
|
471 | 732 |
@misc{KuklewiczHaskell, |
628 | 733 |
author = {C.~Kuklewicz}, |
471 | 734 |
keywords = {Buggy C POSIX Lexing Libraries}, |
518 | 735 |
title = {Regex Posix}, |
471 | 736 |
url = {https://wiki.haskell.org/Regex_Posix}, |
518 | 737 |
year = {2017}, |
738 |
bdsk-url-1 = {https://wiki.haskell.org/Regex_Posix}} |
|
739 |
||
472 | 740 |
@misc{regex101, |
628 | 741 |
author = {D.~Firas}, |
518 | 742 |
keywords = {regex tester debugger}, |
743 |
title = {regex101}, |
|
472 | 744 |
url = {https://regex101.com/}, |
518 | 745 |
year = {2011}, |
746 |
bdsk-url-1 = {https://regex101.com/}} |
|
747 |
||
500 | 748 |
@misc{atomEditor, |
628 | 749 |
author = {N.~Sobo}, |
518 | 750 |
keywords = {text editor}, |
500 | 751 |
title = {Atom Editor}, |
518 | 752 |
url = {https://atom.io}, |
500 | 753 |
year = {2022}, |
518 | 754 |
bdsk-url-1 = {https://atom.io}} |
755 |
||
471 | 756 |
@techreport{grathwohl2014crash, |
628 | 757 |
author = {N.~B.~B.~Grathwohl and F.~Henglein and U.~T.~Rasmussen}, |
518 | 758 |
institution = {Technical report, University of Copenhagen}, |
759 |
title = {A Crash-Course in Regular Expression Parsing and Regular Expressions as Types}, |
|
760 |
year = {2014}} |
|
471 | 761 |
|
602 | 762 |
@inproceedings{xml2015, |
626 | 763 |
abstract = {Regular expressions are omnipresent in database applications. They form the structural core of schema languages for XML, they are a fundamental ingredient for navigational queries in graph databases, and are being considered in languages for upcoming technologies such as schema- and transformation languages for tabular data on the Web. In this paper we study the usage and effectiveness of the counting operator (or: limited repetition) in regular expressions. The counting operator is a popular extension which is part of the POSIX standard and therefore also present in regular expressions in grep, Java, Python, Perl, and Ruby. In a database context, expressions with counting appear in XML Schema and languages for querying graphs such as SPARQL 1.1 and Cypher.We first present a practical study that suggests that counters are extensively used in practice. We then investigate evaluation methods for such expressions and develop a new algorithm for efficient incremental evaluation. Finally, we conduct an extensive benchmark study that shows that exploiting counting operators can lead to speed-ups of several orders of magnitude in a wide range of settings: normal and incremental evaluation on synthetic and real expressions.}, |
764 |
address = {New York, NY, USA}, |
|
628 | 765 |
author = {H~.Bj\"{o}rklund and W.~Martens and T.~Timm}, |
626 | 766 |
booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management}, |
767 |
doi = {10.1145/2806416.2806434}, |
|
768 |
isbn = {9781450337946}, |
|
769 |
keywords = {regular expressions, schema, regular path queries, xml}, |
|
770 |
location = {Melbourne, Australia}, |
|
771 |
numpages = {10}, |
|
772 |
pages = {1541--1550}, |
|
773 |
publisher = {Association for Computing Machinery}, |
|
774 |
series = {CIKM '15}, |
|
775 |
title = {Efficient Incremental Evaluation of Succinct Regular Expressions}, |
|
776 |
url = {https://doi.org/10.1145/2806416.2806434}, |
|
777 |
year = {2015}, |
|
778 |
bdsk-url-1 = {https://doi.org/10.1145/2806416.2806434}} |
|
602 | 779 |
|
468 | 780 |
@misc{SE16, |
781 |
author = {StackStatus}, |
|
782 |
date-added = {2019-06-26 11:28:41 +0000}, |
|
783 |
date-modified = {2019-06-26 16:07:31 +0000}, |
|
784 |
keywords = {ReDos Attack}, |
|
785 |
month = {July}, |
|
786 |
rating = {5}, |
|
787 |
read = {1}, |
|
788 |
title = {Stack Overflow Outage Postmortem}, |
|
789 |
url = {https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}, |
|
790 |
year = {2016}, |
|
791 |
bdsk-url-1 = {https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}} |
|
792 |
||
793 |
@article{HosoyaVouillonPierce2005, |
|
794 |
author = {H.~Hosoya and J.~Vouillon and B.~C.~Pierce}, |
|
795 |
journal = {ACM Transactions on Programming Languages and Systems (TOPLAS)}, |
|
796 |
number = 1, |
|
797 |
pages = {46--90}, |
|
798 |
title = {{R}egular {E}xpression {T}ypes for {XML}}, |
|
799 |
volume = 27, |
|
800 |
year = {2005}} |
|
801 |
||
802 |
@misc{POSIX, |
|
803 |
note = {\url{http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html}}, |
|
804 |
title = {{T}he {O}pen {G}roup {B}ase {S}pecification {I}ssue 6 {IEEE} {S}td 1003.1 2004 {E}dition}, |
|
805 |
year = {2004}} |
|
806 |
||
807 |
@inproceedings{AusafDyckhoffUrban2016, |
|
808 |
author = {F.~Ausaf and R.~Dyckhoff and C.~Urban}, |
|
809 |
booktitle = {Proc.~of the 7th International Conference on Interactive Theorem Proving (ITP)}, |
|
810 |
pages = {69--86}, |
|
811 |
series = {LNCS}, |
|
812 |
title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions ({P}roof {P}earl)}, |
|
813 |
volume = {9807}, |
|
814 |
year = {2016}} |
|
815 |
||
816 |
@article{aduAFP16, |
|
817 |
author = {F.~Ausaf and R.~Dyckhoff and C.~Urban}, |
|
818 |
issn = {2150-914x}, |
|
819 |
journal = {Archive of Formal Proofs}, |
|
820 |
note = {\url{http://www.isa-afp.org/entries/Posix-Lexing.shtml}, Formal proof development}, |
|
821 |
title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions}, |
|
822 |
year = 2016} |
|
823 |
||
824 |
@techreport{CrashCourse2014, |
|
825 |
annote = {draft report}, |
|
826 |
author = {N.~B.~B.~Grathwohl and F.~Henglein and U.~T.~Rasmussen}, |
|
827 |
institution = {University of Copenhagen}, |
|
828 |
title = {{A} {C}rash-{C}ourse in {R}egular {E}xpression {P}arsing and {R}egular {E}xpressions as {T}ypes}, |
|
829 |
year = {2014}} |
|
830 |
||
831 |
@inproceedings{Sulzmann2014, |
|
832 |
author = {M.~Sulzmann and K.~Lu}, |
|
833 |
booktitle = {Proc.~of the 12th International Conference on Functional and Logic Programming (FLOPS)}, |
|
834 |
pages = {203--220}, |
|
835 |
series = {LNCS}, |
|
836 |
title = {{POSIX} {R}egular {E}xpression {P}arsing with {D}erivatives}, |
|
837 |
volume = {8475}, |
|
838 |
year = {2014}} |
|
839 |
||
840 |
@inproceedings{Sulzmann2014b, |
|
841 |
author = {M.~Sulzmann and P.~van Steenhoven}, |
|
842 |
booktitle = {Proc.~of the 23rd International Conference on Compiler Construction (CC)}, |
|
843 |
pages = {174--191}, |
|
844 |
series = {LNCS}, |
|
845 |
title = {{A} {F}lexible and {E}fficient {ML} {L}exer {T}ool {B}ased on {E}xtended {R}egular {E}xpression {S}ubmatching}, |
|
846 |
volume = {8409}, |
|
847 |
year = {2014}} |
|
848 |
||
849 |
@book{Pierce2015, |
|
850 |
author = {B.~C.~Pierce and C.~Casinghino and M.~Gaboardi and M.~Greenberg and C.~Hri\c{t}cu and V.~Sj\"{o}berg and B.~Yorgey}, |
|
851 |
note = {\url{http://www.cis.upenn.edu/~bcpierce/sf}}, |
|
852 |
publisher = {Electronic textbook}, |
|
853 |
title = {{S}oftware {F}oundations}, |
|
854 |
year = {2015}} |
|
855 |
||
856 |
@misc{Kuklewicz, |
|
857 |
author = {C.~Kuklewicz}, |
|
858 |
howpublished = {\url{https://wiki.haskell.org/Regex_Posix}}, |
|
859 |
title = {{R}egex {P}osix}} |
|
860 |
||
861 |
@article{Vansummeren2006, |
|
862 |
author = {S.~Vansummeren}, |
|
863 |
journal = {ACM Transactions on Programming Languages and Systems}, |
|
864 |
number = {3}, |
|
865 |
pages = {389--428}, |
|
866 |
title = {{T}ype {I}nference for {U}nique {P}attern {M}atching}, |
|
867 |
volume = {28}, |
|
868 |
year = {2006}} |
|
869 |
||
870 |
@inproceedings{Asperti12, |
|
871 |
author = {A.~Asperti}, |
|
872 |
booktitle = {Proc.~of the 3rd International Conference on Interactive Theorem Proving (ITP)}, |
|
873 |
pages = {283--298}, |
|
874 |
series = {LNCS}, |
|
875 |
title = {{A} {C}ompact {P}roof of {D}ecidability for {R}egular {E}xpression {E}quivalence}, |
|
876 |
volume = {7406}, |
|
877 |
year = {2012}} |
|
878 |
||
879 |
@inproceedings{Frisch2004, |
|
880 |
author = {A.~Frisch and L.~Cardelli}, |
|
881 |
booktitle = {Proc.~of the 31st International Conference on Automata, Languages and Programming (ICALP)}, |
|
882 |
pages = {618--629}, |
|
883 |
series = {LNCS}, |
|
884 |
title = {{G}reedy {R}egular {E}xpression {M}atching}, |
|
885 |
volume = {3142}, |
|
886 |
year = {2004}} |
|
887 |
||
888 |
@article{Antimirov95, |
|
889 |
author = {V.~Antimirov}, |
|
890 |
journal = {Theoretical Computer Science}, |
|
891 |
pages = {291--319}, |
|
892 |
title = {{P}artial {D}erivatives of {R}egular {E}xpressions and {F}inite {A}utomata {C}onstructions}, |
|
893 |
volume = {155}, |
|
894 |
year = {1995}} |
|
895 |
||
896 |
@inproceedings{Nipkow98, |
|
897 |
author = {T.~Nipkow}, |
|
898 |
booktitle = {Proc.~of the 11th International Conference on Theorem Proving in Higher Order Logics (TPHOLs)}, |
|
899 |
pages = {1--15}, |
|
900 |
series = {LNCS}, |
|
901 |
title = {{V}erified {L}exical {A}nalysis}, |
|
902 |
volume = 1479, |
|
903 |
year = 1998} |
|
904 |
||
668
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
905 |
@book{Isabelle, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
906 |
title={Isabelle/HOL: a proof assistant for higher-order logic}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
907 |
author={Nipkow, Tobias and Paulson, Lawrence C and Wenzel, Markus}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
908 |
volume={2283}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
909 |
year={2002}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
910 |
publisher={Springer Science \& Business Media} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
911 |
} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
912 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
913 |
|
468 | 914 |
@article{Brzozowski1964, |
915 |
author = {J.~A.~Brzozowski}, |
|
916 |
journal = {Journal of the {ACM}}, |
|
917 |
number = {4}, |
|
918 |
pages = {481--494}, |
|
919 |
title = {{D}erivatives of {R}egular {E}xpressions}, |
|
920 |
volume = {11}, |
|
921 |
year = {1964}} |
|
922 |
||
923 |
@article{Leroy2009, |
|
924 |
author = {X.~Leroy}, |
|
925 |
journal = {Communications of the ACM}, |
|
926 |
number = 7, |
|
927 |
pages = {107--115}, |
|
928 |
title = {{F}ormal {V}erification of a {R}ealistic {C}ompiler}, |
|
929 |
volume = 52, |
|
930 |
year = 2009} |
|
931 |
||
932 |
||
933 |
@article{Wu2014, |
|
934 |
author = {C.~Wu and X.~Zhang and C.~Urban}, |
|
935 |
journal = {Journal of Automatic Reasoning}, |
|
936 |
number = {4}, |
|
937 |
pages = {451--480}, |
|
938 |
title = {{A} {F}ormalisation of the {M}yhill-{N}erode {T}heorem based on {R}egular {E}xpressions}, |
|
939 |
volume = {52}, |
|
940 |
year = {2014}} |
|
941 |
||
942 |
@inproceedings{Regehr2011, |
|
943 |
author = {X.~Yang and Y.~Chen and E.~Eide and J.~Regehr}, |
|
944 |
booktitle = {Proc.~of the 32nd ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI)}, |
|
945 |
pages = {283--294}, |
|
946 |
title = {{F}inding and {U}nderstanding {B}ugs in {C} {C}ompilers}, |
|
947 |
year = {2011}} |
|
948 |
||
949 |
@article{Norrish2014, |
|
950 |
author = {A.~Barthwal and M.~Norrish}, |
|
951 |
journal = {Journal of Computer and System Sciences}, |
|
952 |
number = {2}, |
|
953 |
pages = {346--362}, |
|
954 |
title = {{A} {M}echanisation of {S}ome {C}ontext-{F}ree {L}anguage {T}heory in {HOL4}}, |
|
955 |
volume = {80}, |
|
956 |
year = {2014}} |
|
957 |
||
958 |
@article{Thompson1968, |
|
959 |
author = {K.~Thompson}, |
|
960 |
issue_date = {June 1968}, |
|
961 |
journal = {Communications of the ACM}, |
|
962 |
number = {6}, |
|
963 |
pages = {419--422}, |
|
964 |
title = {{P}rogramming {T}echniques: {R}egular {E}xpression {S}earch {A}lgorithm}, |
|
965 |
volume = {11}, |
|
966 |
year = {1968}} |
|
967 |
||
968 |
@article{Owens2009, |
|
969 |
author = {S.~Owens and J.~H.~Reppy and A.~Turon}, |
|
970 |
journal = {Journal of Functinal Programming}, |
|
971 |
number = {2}, |
|
972 |
pages = {173--190}, |
|
973 |
title = {{R}egular-{E}xpression {D}erivatives {R}e-{E}xamined}, |
|
974 |
volume = {19}, |
|
975 |
year = {2009}} |
|
976 |
||
977 |
@inproceedings{Sulzmann2015, |
|
978 |
author = {M.~Sulzmann and P.~Thiemann}, |
|
979 |
booktitle = {Proc.~of the 9th International Conference on Language and Automata Theory and Applications (LATA)}, |
|
980 |
pages = {275--286}, |
|
981 |
series = {LNCS}, |
|
982 |
title = {{D}erivatives for {R}egular {S}huffle {E}xpressions}, |
|
983 |
volume = {8977}, |
|
984 |
year = {2015}} |
|
985 |
||
986 |
@inproceedings{Chen2012, |
|
987 |
author = {H.~Chen and S.~Yu}, |
|
988 |
booktitle = {Proc.~in the International Workshop on Theoretical Computer Science (WTCS)}, |
|
989 |
pages = {343--356}, |
|
990 |
series = {LNCS}, |
|
991 |
title = {{D}erivatives of {R}egular {E}xpressions and an {A}pplication}, |
|
992 |
volume = {7160}, |
|
993 |
year = {2012}} |
|
994 |
||
995 |
@article{Krauss2011, |
|
996 |
author = {A.~Krauss and T.~Nipkow}, |
|
997 |
journal = {Journal of Automated Reasoning}, |
|
998 |
pages = {95--106}, |
|
999 |
title = {{P}roof {P}earl: {R}egular {E}xpression {E}quivalence and {R}elation {A}lgebra}, |
|
1000 |
volume = 49, |
|
1001 |
year = 2012} |
|
1002 |
||
1003 |
@inproceedings{Traytel2015, |
|
1004 |
author = {D.~Traytel}, |
|
1005 |
booktitle = {Proc.~of the 24th Annual Conference on Computer Science Logic (CSL)}, |
|
1006 |
pages = {487--503}, |
|
1007 |
series = {LIPIcs}, |
|
1008 |
title = {{A} {C}oalgebraic {D}ecision {P}rocedure for {WS1S}}, |
|
1009 |
volume = {41}, |
|
1010 |
year = {2015}} |
|
1011 |
||
1012 |
@inproceedings{Traytel2013, |
|
1013 |
author = {D.~Traytel and T.~Nipkow}, |
|
1014 |
booktitle = {Proc.~of the 18th ACM SIGPLAN International Conference on Functional Programming (ICFP)}, |
|
1015 |
pages = {3-12}, |
|
1016 |
title = {{A} {V}erified {D}ecision {P}rocedure for {MSO} on {W}ords {B}ased on {D}erivatives of {R}egular {E}xpressions}, |
|
1017 |
year = 2013} |
|
1018 |
||
1019 |
@inproceedings{Coquand2012, |
|
1020 |
author = {T.~Coquand and V.~Siles}, |
|
1021 |
booktitle = {Proc.~of the 1st International Conference on Certified Programs and Proofs (CPP)}, |
|
1022 |
pages = {119--134}, |
|
1023 |
series = {LNCS}, |
|
1024 |
title = {{A} {D}ecision {P}rocedure for {R}egular {E}xpression {E}quivalence in {T}ype {T}heory}, |
|
1025 |
volume = {7086}, |
|
1026 |
year = {2011}} |
|
1027 |
||
1028 |
@inproceedings{Almeidaetal10, |
|
1029 |
author = {J.~B.~Almeida and N.~Moriera and D.~Pereira and S.~M.~de Sousa}, |
|
1030 |
booktitle = {Proc.~of the 15th International Conference on Implementation and Application of Automata (CIAA)}, |
|
1031 |
pages = {59-68}, |
|
1032 |
series = {LNCS}, |
|
1033 |
title = {{P}artial {D}erivative {A}utomata {F}ormalized in {C}oq}, |
|
1034 |
volume = {6482}, |
|
1035 |
year = {2010}} |
|
1036 |
||
1037 |
@article{Owens2008, |
|
1038 |
author = {S.~Owens and K.~Slind}, |
|
1039 |
journal = {Higher-Order and Symbolic Computation}, |
|
1040 |
number = {4}, |
|
1041 |
pages = {377--409}, |
|
1042 |
title = {{A}dapting {F}unctional {P}rograms to {H}igher {O}rder {L}ogic}, |
|
1043 |
volume = {21}, |
|
1044 |
year = {2008}} |
|
1045 |
||
1046 |
@article{Owens2, |
|
1047 |
author = {S.~Owens and K.~Slind}, |
|
1048 |
bibsource = {dblp computer science bibliography, http://dblp.org}, |
|
1049 |
biburl = {http://dblp.uni-trier.de/rec/bib/journals/lisp/OwensS08}, |
|
1050 |
doi = {10.1007/s10990-008-9038-0}, |
|
1051 |
journal = {Higher-Order and Symbolic Computation}, |
|
1052 |
number = {4}, |
|
1053 |
pages = {377--409}, |
|
1054 |
timestamp = {Wed, 16 Dec 2009 13:51:02 +0100}, |
|
1055 |
title = {Adapting functional programs to higher order logic}, |
|
1056 |
url = {http://dx.doi.org/10.1007/s10990-008-9038-0}, |
|
1057 |
volume = {21}, |
|
1058 |
year = {2008}, |
|
1059 |
bdsk-url-1 = {http://dx.doi.org/10.1007/s10990-008-9038-0}} |
|
1060 |
||
1061 |
@misc{PCRE, |
|
1062 |
title = {{PCRE - Perl Compatible Regular Expressions}}, |
|
1063 |
url = {http://www.pcre.org}, |
|
1064 |
bdsk-url-1 = {http://www.pcre.org}} |
|
1065 |
||
1066 |
@inproceedings{Davis18, |
|
1067 |
author = {J.~C.~Davis and C.~.A.~Coghlan and F.~Servant and D.~Lee}, |
|
1068 |
booktitle = {Proc.~of the 26th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE)}, |
|
1069 |
numpages = {11}, |
|
1070 |
pages = {246--256}, |
|
1071 |
title = {{T}he {I}mpact of {R}egular {E}xpression {D}enial of {S}ervice ({ReDoS}) in {P}ractice: {A}n {E}mpirical {S}tudy at the {E}cosystem {S}cale}, |
|
1072 |
year = {2018}} |
|
634 | 1073 |
|
1074 |
||
1075 |
@misc{grep, |
|
1076 |
title = {GNU grep}, |
|
636 | 1077 |
url = {https://www.gnu.org/software/grep/manual/grep.html} |
634 | 1078 |
} |
636 | 1079 |
|
1080 |
@misc{GOregexp, |
|
1081 |
title = {GO regexp package documentation}, |
|
1082 |
url = {https://pkg.go.dev/regexp#pkg-overview} |
|
1083 |
} |
|
664 | 1084 |
|
1085 |
||
1086 |
@inproceedings{atkey2010amortised, |
|
1087 |
title={Amortised resource analysis with separation logic}, |
|
1088 |
author={Atkey, Robert}, |
|
1089 |
booktitle={European Symposium on Programming}, |
|
1090 |
pages={85--103}, |
|
1091 |
year={2010}, |
|
1092 |
organization={Springer} |
|
1093 |
} |
|
1094 |
||
1095 |
%10.1007/978-3-319-89884-1_19, |
|
1096 |
||
1097 |
@InProceedings{bigOImperative, |
|
1098 |
author="Gu{\'e}neau, Arma{\"e}l |
|
1099 |
and Chargu{\'e}raud, Arthur |
|
1100 |
and Pottier, Fran{\c{c}}ois", |
|
1101 |
editor="Ahmed, Amal", |
|
1102 |
title="A Fistful of Dollars: Formalizing Asymptotic Complexity Claims via Deductive Program Verification", |
|
1103 |
booktitle="Programming Languages and Systems", |
|
1104 |
year="2018", |
|
1105 |
publisher="Springer International Publishing", |
|
1106 |
address="Cham", |
|
1107 |
pages="533--560", |
|
1108 |
abstract="We present a framework for simultaneously verifying the functional correctness and the worst-case asymptotic time complexity of higher-order imperative programs. We build on top of Separation Logic with Time Credits, embedded in an interactive proof assistant. We formalize the O notation, which is key to enabling modular specifications and proofs. We cover the subtleties of the multivariate case, where the complexity of a program fragment depends on multiple parameters. We propose a way of integrating complexity bounds into specifications, present lemmas and tactics that support a natural reasoning style, and illustrate their use with a collection of examples.", |
|
1109 |
isbn="978-3-319-89884-1" |
|
1110 |
} |
|
668
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1111 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1112 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1113 |
@article{ValiantParsing, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1114 |
title={General Context-Free Recognition in Less than Cubic Time}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1115 |
author={Leslie G. Valiant}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1116 |
journal={J. Comput. Syst. Sci.}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1117 |
year={1975}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1118 |
volume={10}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1119 |
pages={308-315} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1120 |
} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1121 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1122 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1123 |
@inproceedings{bigOIsabelle, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1124 |
title={Formalizing O notation in Isabelle/HOL}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1125 |
author={Avigad, Jeremy and Donnelly, Kevin}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1126 |
booktitle={International Joint Conference on Automated Reasoning}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1127 |
pages={357--371}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1128 |
year={2004}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1129 |
organization={Springer} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1130 |
} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1131 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1132 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1133 |
@inbook{subtypingFormalComplexity, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1134 |
author = {Bessai, Jan and Rehof, Jakob and Düdder, Boris}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1135 |
year = {2019}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1136 |
month = {06}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1137 |
pages = {356-371}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1138 |
title = {Fast Verified BCD Subtyping}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1139 |
isbn = {978-3-030-22347-2}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1140 |
doi = {10.1007/978-3-030-22348-9_21} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1141 |
} |
664 | 1142 |
%@article{10.1145/640128.604148, |
1143 |
%author = {Hofmann, Martin and Jost, Steffen}, |
|
1144 |
%title = {Static Prediction of Heap Space Usage for First-Order Functional Programs}, |
|
1145 |
%year = {2003}, |
|
1146 |
%issue_date = {January 2003}, |
|
1147 |
%publisher = {Association for Computing Machinery}, |
|
1148 |
%address = {New York, NY, USA}, |
|
1149 |
%volume = {38}, |
|
1150 |
%number = {1}, |
|
1151 |
%issn = {0362-1340}, |
|
1152 |
%url = {https://doi.org/10.1145/640128.604148}, |
|
1153 |
%doi = {10.1145/640128.604148}, |
|
1154 |
%abstract = {We show how to efficiently obtain linear a priori bounds on the heap space consumption of first-order functional programs.The analysis takes space reuse by explicit deallocation into account and also furnishes an upper bound on the heap usage in the presence of garbage collection. It covers a wide variety of examples including, for instance, the familiar sorting algorithms for lists, including quicksort.The analysis relies on a type system with resource annotations. Linear programming (LP) is used to automatically infer derivations in this enriched type system.We also show that integral solutions to the linear programs derived correspond to programs that can be evaluated without any operating system support for memory management. The particular integer linear programs arising in this way are shown to be feasibly solvable under mild assumptions.}, |
|
1155 |
%journal = {SIGPLAN Not.}, |
|
1156 |
%month = {jan}, |
|
1157 |
%pages = {185–197}, |
|
1158 |
%numpages = {13}, |
|
1159 |
%keywords = {heap, functional programming, program analysis, resources, garbage collection} |
|
1160 |
%} |
|
1161 |
% |
|
1162 |
%@inproceedings{10.1145/604131.604148, |
|
1163 |
%author = {Hofmann, Martin and Jost, Steffen}, |
|
1164 |
%title = {Static Prediction of Heap Space Usage for First-Order Functional Programs}, |
|
1165 |
%year = {2003}, |
|
1166 |
%isbn = {1581136285}, |
|
1167 |
%publisher = {Association for Computing Machinery}, |
|
1168 |
%address = {New York, NY, USA}, |
|
1169 |
%url = {https://doi.org/10.1145/604131.604148}, |
|
1170 |
%doi = {10.1145/604131.604148}, |
|
1171 |
%abstract = {We show how to efficiently obtain linear a priori bounds on the heap space consumption of first-order functional programs.The analysis takes space reuse by explicit deallocation into account and also furnishes an upper bound on the heap usage in the presence of garbage collection. It covers a wide variety of examples including, for instance, the familiar sorting algorithms for lists, including quicksort.The analysis relies on a type system with resource annotations. Linear programming (LP) is used to automatically infer derivations in this enriched type system.We also show that integral solutions to the linear programs derived correspond to programs that can be evaluated without any operating system support for memory management. The particular integer linear programs arising in this way are shown to be feasibly solvable under mild assumptions.}, |
|
1172 |
%booktitle = {Proceedings of the 30th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages}, |
|
1173 |
%pages = {185–197}, |
|
1174 |
%numpages = {13}, |
|
1175 |
%keywords = {resources, heap, garbage collection, program analysis, functional programming}, |
|
1176 |
%location = {New Orleans, Louisiana, USA}, |
|
1177 |
%series = {POPL '03} |
|
1178 |
%} |
|
1179 |
||
1180 |
||
668
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1181 |
%10.1145/3591262, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1182 |
|
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1183 |
@article{Margus2023PLDI, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1184 |
author = {Moseley, Dan and Nishio, Mario and Perez Rodriguez, Jose and Saarikivi, Olli and Toub, Stephen and Veanes, Margus and Wan, Tiki and Xu, Eric}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1185 |
title = {Derivative Based Nonbacktracking Real-World Regex Matching with Backtracking Semantics}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1186 |
year = {2023}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1187 |
issue_date = {June 2023}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1188 |
publisher = {Association for Computing Machinery}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1189 |
address = {New York, NY, USA}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1190 |
volume = {7}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1191 |
number = {PLDI}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1192 |
url = {https://doi.org/10.1145/3591262}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1193 |
doi = {10.1145/3591262}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1194 |
abstract = {We develop a new derivative based theory and algorithm for nonbacktracking regex matching that supports anchors and counting, preserves backtracking semantics, and can be extended with lookarounds. The algorithm has been implemented as a new regex backend in .NET and was extensively tested as part of the formal release process of .NET7. We present a formal proof of the correctness of the algorithm, which we believe to be the first of its kind concerning industrial implementations of regex matchers. The paper describes the complete foundation, the matching algorithm, and key aspects of the implementation involving a regex rewrite system, as well as a comprehensive evaluation over industrial case studies and other regex engines.}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1195 |
journal = {Proc. ACM Program. Lang.}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1196 |
month = {jun}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1197 |
articleno = {148}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1198 |
numpages = {24}, |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1199 |
keywords = {automata, derivative, symbolic, regex, PCRE, matching} |
3831621d7b14
added technical Overview section, almost done introduction
Chengsong
parents:
664
diff
changeset
|
1200 |
} |