468
+ − 1
%% This BibTeX bibliography file was created using BibDesk.
+ − 2
%% https://bibdesk.sourceforge.io/
+ − 3
518
+ − 4
%% Created for CS TAN at 2022-05-23 18:43:50 +0100
468
+ − 5
+ − 6
+ − 7
%% Saved with string encoding Unicode (UTF-8)
607
+ − 8
608
+ − 9
621
+ − 10
@article{Murugesan2014,
+ − 11
author = {N.~Murugesan and O.~V.~Shanmuga Sundaram},
+ − 12
title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions},
+ − 13
journal = {International Journal of Computer Trends and Technology},
+ − 14
volume = {13},
+ − 15
number = {1},
+ − 16
year = {2014},
+ − 17
url = {http://arxiv.org/abs/1407.5902},
+ − 18
pages = {29--33}
+ − 19
}
+ − 20
+ − 21
@PhdThesis{Ausaf,
+ − 22
author = {F.~Ausaf},
+ − 23
title = {{V}erified {L}exing and {P}arsing},
+ − 24
school = {King's College London},
+ − 25
year = {2018}
+ − 26
}
608
+ − 27
609
+ − 28
%% POSIX specification------------------------
+ − 29
@InProceedings{Okui10,
622
+ − 30
author=" S.~Okui
+ − 31
and T.~Suzuki",
609
+ − 32
editor="Domaratzki, Michael
+ − 33
and Salomaa, Kai",
+ − 34
title="Disambiguation in Regular Expression Matching via Position Automata with Augmented Transitions",
+ − 35
booktitle="Implementation and Application of Automata",
+ − 36
year="2011",
+ − 37
publisher="Springer Berlin Heidelberg",
+ − 38
address="Berlin, Heidelberg",
+ − 39
pages="231--240",
+ − 40
abstract="This paper offers a new efficient regular expression matching algorithm which follows the POSIX-type leftmost-longest rule. The algorithm basically emulates the subset construction without backtracking, so that its computational cost even in the worst case does not explode exponentially; the time complexity of the algorithm is O(mn(n{\thinspace}+{\thinspace}c)), where m is the length of a given input string, n the number of occurrences of the most frequently used letter in a given regular expression and c the number of subexpressions to be used for capturing substrings. A formalization of the leftmost-longest semantics by using parse trees is also discussed.",
+ − 41
isbn="978-3-642-18098-9"
+ − 42
}
+ − 43
+ − 44
%% POSIX specification------------------------
+ − 45
+ − 46
%% Brzozowski ders------------------------
+ − 47
@article{Berglund14,
622
+ − 48
author = {M.~Berglund, F.~Drewes and B.~Van Der Merwe},
609
+ − 49
year = {2014},
+ − 50
month = {05},
+ − 51
pages = {},
+ − 52
title = {Analyzing Catastrophic Backtracking Behavior in Practical Regular Expression Matching},
+ − 53
volume = {151},
+ − 54
journal = {Electronic Proceedings in Theoretical Computer Science},
+ − 55
doi = {10.4204/EPTCS.151.7}
+ − 56
}
+ − 57
+ − 58
@InProceedings{Berglund18,
622
+ − 59
author="M.~Berglund
609
+ − 60
and Bester, Willem
+ − 61
and van der Merwe, Brink",
+ − 62
editor="Fischer, Bernd
+ − 63
and Uustalu, Tarmo",
+ − 64
title="Formalising Boost POSIX Regular Expression Matching",
+ − 65
booktitle="Theoretical Aspects of Computing -- ICTAC 2018",
+ − 66
year="2018",
+ − 67
publisher="Springer International Publishing",
+ − 68
address="Cham",
+ − 69
pages="99--115",
+ − 70
abstract="Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.",
+ − 71
isbn="978-3-030-02508-3"
+ − 72
}
+ − 73
+ − 74
+ − 75
@inproceedings{Chen12,
+ − 76
author = {Chen, Haiming and Yu, Sheng},
+ − 77
year = {2012},
+ − 78
month = {01},
+ − 79
pages = {343-356},
+ − 80
title = {Derivatives of Regular Expressions and an Application},
+ − 81
volume = {7160},
+ − 82
doi = {10.1007/978-3-642-27654-5_27}
+ − 83
}
+ − 84
+ − 85
+ − 86
+ − 87
%% Brzozowski ders------------------------
622
+ − 88
%@article{Murugesan2014,
+ − 89
% author = {N.~Murugesan and O.~V.~Shanmuga Sundaram},
+ − 90
% title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions},
+ − 91
% journal = {International Journal of Computer Trends and Technology},
+ − 92
% volume = {13},
+ − 93
% number = {1},
+ − 94
% year = {2014},
+ − 95
% url = {http://arxiv.org/abs/1407.5902},
+ − 96
% pages = {29--33}
+ − 97
%}
609
+ − 98
+ − 99
%% look-aheads------------------------
+ − 100
@article{Takayuki2019,
+ − 101
title={Derivatives of Regular Expressions with Lookahead},
+ − 102
author={Takayuki Miyazaki and Yasuhiko Minamide},
+ − 103
journal={Journal of Information Processing},
+ − 104
volume={27},
+ − 105
number={ },
+ − 106
pages={422-430},
+ − 107
year={2019},
+ − 108
doi={10.2197/ipsjjip.27.422}
+ − 109
}
+ − 110
+ − 111
%% look-aheads------------------------
+ − 112
+ − 113
608
+ − 114
624
+ − 115
%% -------------------------------------
608
+ − 116
%% back-references--------------------
624
+ − 117
@article{FERNAU2015287,
+ − 118
title = {Pattern matching with variables: A multivariate complexity analysis},
+ − 119
journal = {Information and Computation},
+ − 120
volume = {242},
+ − 121
pages = {287-305},
+ − 122
year = {2015},
+ − 123
issn = {0890-5401},
+ − 124
doi = {https://doi.org/10.1016/j.ic.2015.03.006},
+ − 125
url = {https://www.sciencedirect.com/science/article/pii/S0890540115000218},
+ − 126
author = {H.~Fernau and M.L.~Schmid},
+ − 127
keywords = {Parameterised pattern matching, Function matching, NP-completeness, Membership problem for pattern languages, Morphisms},
+ − 128
abstract = {A pattern α, i.e., a string that contains variables and terminals, matches a terminal word w if w can be obtained by uniformly substituting the variables of α by terminal words. Deciding whether a given terminal word matches a given pattern is NP-complete and this holds for several natural variants of the problem that result from whether or not variables can be erased, whether or not the patterns are required to be terminal-free or whether or not the mapping of variables to terminal words must be injective. We consider numerous parameters of this problem (i.e., number of variables, length of w, length of the words substituted for variables, number of occurrences per variable, cardinality of the terminal alphabet) and for all possible combinations of the parameters (and variants described above), we answer the question whether or not the problem is still NP-complete if these parameters are bounded by constants.}
+ − 129
}
+ − 130
+ − 131
@inproceedings{Schmid2012,
+ − 132
author = {M.L.~Schmid},
+ − 133
title = {Inside the Class of REGEX Languages},
+ − 134
year = {2012},
+ − 135
isbn = {9783642316524},
+ − 136
publisher = {Springer-Verlag},
+ − 137
address = {Berlin, Heidelberg},
+ − 138
url = {https://doi.org/10.1007/978-3-642-31653-1_8},
+ − 139
doi = {10.1007/978-3-642-31653-1_8},
+ − 140
abstract = {We study different possibilities of combining the concept of homomorphic replacement with regular expressions in order to investigate the class of languages given by extended regular expressions with backreferences (REGEX). It is shown in which regard existing and natural ways to do this fail to reach the expressive power of REGEX. Furthermore, the complexity of the membership problem for REGEX with a bounded number of backreferences is considered.},
+ − 141
booktitle = {Proceedings of the 16th International Conference on Developments in Language Theory},
+ − 142
pages = {73–84},
+ − 143
numpages = {12},
+ − 144
keywords = {extended regular expressions, pattern languages, REGEX, pattern expressions, homomorphic replacement},
+ − 145
location = {Taipei, Taiwan},
+ − 146
series = {DLT'12}
+ − 147
}
+ − 148
+ − 149
+ − 150
+ − 151
@article{BERGLUND2022,
+ − 152
title = {Re-examining regular expressions with backreferences},
+ − 153
journal = {Theoretical Computer Science},
+ − 154
year = {2022},
+ − 155
issn = {0304-3975},
+ − 156
doi = {https://doi.org/10.1016/j.tcs.2022.10.041},
+ − 157
url = {https://www.sciencedirect.com/science/article/pii/S0304397522006570},
+ − 158
author = {Martin Berglund and Brink {van der Merwe}},
+ − 159
keywords = {Regular expressions, Backreferences},
+ − 160
abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.}
608
+ − 161
}
+ − 162
624
+ − 163
@article{FREYDENBERGER20191,
+ − 164
title = {Deterministic regular expressions with back-references},
+ − 165
journal = {Journal of Computer and System Sciences},
+ − 166
volume = {105},
+ − 167
pages = {1-39},
+ − 168
year = {2019},
+ − 169
issn = {0022-0000},
+ − 170
doi = {https://doi.org/10.1016/j.jcss.2019.04.001},
+ − 171
url = {https://www.sciencedirect.com/science/article/pii/S0022000018301818},
+ − 172
author = {Dominik D. Freydenberger and Markus L. Schmid},
+ − 173
keywords = {Deterministic regular expression, Regex, Glushkov automaton},
+ − 174
abstract = {Most modern libraries for regular expression matching allow back-references (i.e., repetition operators) that substantially increase expressive power, but also lead to intractability. In order to find a better balance between expressiveness and tractability, we combine these with the notion of determinism for regular expressions used in XML DTDs and XML Schema. This includes the definition of a suitable automaton model, and a generalization of the Glushkov construction. We demonstrate that, compared to their non-deterministic superclass, these deterministic regular expressions with back-references have desirable algorithmic properties (i.e., efficiently solvable membership problem and some decidable problems in static analysis), while, at the same time, their expressive power exceeds that of deterministic regular expressions without back-references.}
+ − 175
}
+ − 176
@InProceedings{Frey2013,
+ − 177
author = {Dominik D. Freydenberger},
+ − 178
title = {{Extended Regular Expressions: Succinctness and Decidability}},
+ − 179
booktitle = {28th International Symposium on Theoretical Aspects of Computer Science (STACS 2011) },
+ − 180
pages = {507--518},
+ − 181
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
+ − 182
ISBN = {978-3-939897-25-5},
+ − 183
ISSN = {1868-8969},
+ − 184
year = {2011},
+ − 185
volume = {9},
+ − 186
editor = {Thomas Schwentick and Christoph D{\"u}rr},
+ − 187
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
+ − 188
address = {Dagstuhl, Germany},
+ − 189
URL = {http://drops.dagstuhl.de/opus/volltexte/2011/3039},
+ − 190
URN = {urn:nbn:de:0030-drops-30396},
+ − 191
doi = {10.4230/LIPIcs.STACS.2011.507},
+ − 192
annote = {Keywords: extended regular expressions, regex, decidability, non-recursive tradeoffs}
+ − 193
}
+ − 194
+ − 195
+ − 196
+ − 197
%% -------------------------- campeanu related
+ − 198
@article{campeanu2003formal,
+ − 199
author = {C.~C{\^a}mpeanu and K.~Salomaa and S.~Yu},
+ − 200
journal = {International Journal of Foundations of Computer Science},
+ − 201
number = {06},
+ − 202
pages = {1007--1018},
+ − 203
publisher = {World Scientific},
+ − 204
title = {A formal study of practical regular expressions},
+ − 205
volume = {14},
+ − 206
year = {2003}}
+ − 207
+ − 208
@article{campeanu2009patterns,
+ − 209
author = {C.~C{\^a}mpeanu and N.~Santean},
+ − 210
year = {2009},
+ − 211
month = {05},
+ − 212
pages = {193-207},
+ − 213
title = {On the closure of pattern expressions languages under intersection with regular languages},
+ − 214
volume = {46},
+ − 215
journal = {Acta Inf.},
+ − 216
doi = {10.1007/s00236-009-0090-y}
+ − 217
}
+ − 218
+ − 219
@article{CAMPEANU2009Intersect,
+ − 220
abstract = {In this paper we revisit the semantics of extended regular expressions (regex), defined succinctly in the 90s [A.V. Aho, Algorithms for finding patterns in strings, in: Jan van Leeuwen (Ed.), Handbook of Theoretical Computer Science, in: Algorithms and Complexity, vol. A, Elsevier and MIT Press, 1990, pp. 255--300] and rigorously in 2003 by C{\^a}mpeanu, Salomaa and Yu [C. C{\^a}mpeanu, K. Salomaa, S. Yu, A formal study of practical regular expressions, IJFCS 14 (6) (2003) 1007--1018], when the authors reported an open problem, namely whether regex languages are closed under the intersection with regular languages. We give a positive answer; and for doing so, we propose a new class of machines --- regex automata systems (RAS) --- which are equivalent to regex. Among others, these machines provide a consistent and convenient method of implementing regex in practice. We also prove, as a consequence of this closure property, that several languages, such as the mirror language, the language of palindromes, and the language of balanced words are not regex languages.},
+ − 221
author = {Cezar C{\^a}mpeanu and Nicolae Santean},
+ − 222
doi = {https://doi.org/10.1016/j.tcs.2009.02.022},
+ − 223
issn = {0304-3975},
+ − 224
journal = {Theoretical Computer Science},
+ − 225
keywords = {Extended regular expression, Regex automata system, Regex},
+ − 226
note = {Formal Languages and Applications: A Collection of Papers in Honor of Sheng Yu},
+ − 227
number = {24},
+ − 228
pages = {2336-2344},
+ − 229
title = {On the intersection of regex languages with regular languages},
+ − 230
url = {https://www.sciencedirect.com/science/article/pii/S0304397509001789},
+ − 231
volume = {410},
+ − 232
year = {2009},
+ − 233
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789},
+ − 234
bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}}
+ − 235
608
+ − 236
%% back-references--------------------
624
+ − 237
%% -------------------------------------
608
+ − 238
607
+ − 239
@article{fowler2003,
+ − 240
title={An interpretation of the POSIX regex standard},
+ − 241
author={Fowler, Glenn},
+ − 242
journal={URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html},
+ − 243
year={2003}
+ − 244
}
+ − 245
+ − 246
+ − 247
@inproceedings{Snort1999,
+ − 248
author = {Roesch, Martin},
+ − 249
title = {Snort - Lightweight Intrusion Detection for Networks},
+ − 250
year = {1999},
+ − 251
publisher = {USENIX Association},
+ − 252
address = {USA},
+ − 253
abstract = {Network intrusion detection systems (NIDS) are an important part of any network security architecture. They provide a layer of defense which monitors network traffic for predefined suspicious activity or patterns, and alert system administrators when potential hostile traffic is detected. Commercial NIDS have many differences, but Information Systems departments must face the commonalities that they share such as significant system footprint, complex deployment and high monetary cost. Snort was designed to address these issues.},
+ − 254
booktitle = {Proceedings of the 13th USENIX Conference on System Administration},
+ − 255
pages = {229–238},
+ − 256
numpages = {10},
+ − 257
location = {Seattle, Washington},
+ − 258
series = {LISA '99}
+ − 259
}
+ − 260
606
+ − 261
@inproceedings{Becchi08,
+ − 262
author = {Becchi, Michela and Crowley, Patrick},
+ − 263
year = {2008},
+ − 264
month = {01},
+ − 265
pages = {25},
+ − 266
title = {Extending finite automata to efficiently match Perl-compatible regular expressions},
+ − 267
doi = {10.1145/1544012.1544037}
+ − 268
}
603
+ − 269
605
+ − 270
@book{
+ − 271
Sakarovitch2009,
+ − 272
place={Cambridge},
+ − 273
title={Elements of Automata Theory},
+ − 274
DOI={10.1017/CBO9781139195218},
+ − 275
publisher={Cambridge University Press},
+ − 276
author={Sakarovitch, Jacques},
+ − 277
editor={Thomas, ReubenTranslator},
+ − 278
year={2009}
+ − 279
}
+ − 280
+ − 281
607
+ − 282
@unpublished{CSL2022,
603
+ − 283
author = "Chengsong Tan and Christian Urban",
+ − 284
title = "POSIX Lexing with Bitcoded Derivatives",
+ − 285
note = "submitted",
+ − 286
}
+ − 287
601
+ − 288
@INPROCEEDINGS{Verbatim, author={Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, booktitle={2021 IEEE Security and Privacy Workshops (SPW)}, title={Verbatim: A Verified Lexer Generator}, year={2021}, volume={}, number={}, pages={92-100}, doi={10.1109/SPW53761.2021.00022}}
468
+ − 289
518
+ − 290
601
+ − 291
@inproceedings{Verbatimpp,
+ − 292
author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen},
+ − 293
title = {Verbatim++: Verified, Optimized, and Semantically Rich Lexing with Derivatives},
+ − 294
year = {2022},
+ − 295
isbn = {9781450391825},
+ − 296
publisher = {Association for Computing Machinery},
+ − 297
address = {New York, NY, USA},
+ − 298
url = {https://doi.org/10.1145/3497775.3503694},
+ − 299
doi = {10.1145/3497775.3503694},
+ − 300
abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.},
+ − 301
booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs},
+ − 302
pages = {27–39},
+ − 303
numpages = {13},
+ − 304
keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions},
+ − 305
location = {Philadelphia, PA, USA},
+ − 306
series = {CPP 2022}
+ − 307
}
+ − 308
518
+ − 309
+ − 310
@article{Turo_ov__2020,
+ − 311
author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar},
+ − 312
date-added = {2022-05-23 18:43:04 +0100},
+ − 313
date-modified = {2022-05-23 18:43:04 +0100},
+ − 314
doi = {10.1145/3428286},
+ − 315
journal = {Proceedings of the {ACM} on Programming Languages},
+ − 316
month = {nov},
+ − 317
number = {{OOPSLA}},
+ − 318
pages = {1--30},
+ − 319
publisher = {Association for Computing Machinery ({ACM})},
+ − 320
title = {Regex matching with counting-set automata},
+ − 321
url = {https://doi.org/10.1145%2F3428286},
+ − 322
volume = {4},
+ − 323
year = 2020,
+ − 324
bdsk-url-1 = {https://doi.org/10.1145%2F3428286},
+ − 325
bdsk-url-2 = {https://doi.org/10.1145/3428286}}
+ − 326
468
+ − 327
@article{Rathnayake2014StaticAF,
518
+ − 328
author = {Asiri Rathnayake and Hayo Thielecke},
+ − 329
journal = {ArXiv},
+ − 330
title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics},
+ − 331
volume = {abs/1405.7058},
+ − 332
year = {2014}}
468
+ − 333
472
+ − 334
@inproceedings{Weideman2017Static,
518
+ − 335
author = {Nicolaas Weideman},
+ − 336
title = {Static analysis of regular expressions},
+ − 337
year = {2017}}
472
+ − 338
468
+ − 339
@inproceedings{RibeiroAgda2017,
+ − 340
abstract = {We describe the formalization of a regular expression (RE) parsing algorithm that produces a bit representation of its parse tree in the dependently typed language Agda. The algorithm computes bit-codes using Brzozowski derivatives and we prove that produced codes are equivalent to parse trees ensuring soundness and completeness w.r.t an inductive RE semantics. We include the certified algorithm in a tool developed by us, named verigrep, for regular expression based search in the style of the well known GNU grep. Practical experiments conducted with this tool are reported.},
+ − 341
address = {New York, NY, USA},
+ − 342
articleno = {4},
+ − 343
author = {Ribeiro, Rodrigo and Bois, Andr\'{e} Du},
+ − 344
booktitle = {Proceedings of the 21st Brazilian Symposium on Programming Languages},
+ − 345
date-modified = {2022-03-16 16:38:47 +0000},
+ − 346
doi = {10.1145/3125374.3125381},
+ − 347
isbn = {9781450353892},
+ − 348
keywords = {Certified algorithms, regular expressions, dependent types, bit-codes},
+ − 349
location = {Fortaleza, CE, Brazil},
+ − 350
numpages = {8},
+ − 351
publisher = {Association for Computing Machinery},
+ − 352
series = {SBLP 2017},
+ − 353
title = {Certified Bit-Coded Regular Expression Parsing},
+ − 354
url = {https://doi.org/10.1145/3125374.3125381},
+ − 355
year = {2017},
+ − 356
bdsk-url-1 = {https://doi.org/10.1145/3125374.3125381}}
+ − 357
+ − 358
@article{Thompson_1968,
+ − 359
author = {Ken Thompson},
+ − 360
date-added = {2022-02-23 13:44:42 +0000},
+ − 361
date-modified = {2022-02-23 13:44:42 +0000},
+ − 362
doi = {10.1145/363347.363387},
+ − 363
journal = {Communications of the {ACM}},
+ − 364
month = {jun},
+ − 365
number = {6},
+ − 366
pages = {419--422},
+ − 367
publisher = {Association for Computing Machinery ({ACM})},
+ − 368
title = {Programming Techniques: Regular expression search algorithm},
+ − 369
url = {https://doi.org/10.1145%2F363347.363387},
+ − 370
volume = {11},
+ − 371
year = 1968,
+ − 372
bdsk-url-1 = {https://doi.org/10.1145%2F363347.363387},
+ − 373
bdsk-url-2 = {https://doi.org/10.1145/363347.363387}}
+ − 374
+ − 375
@article{17Bir,
+ − 376
author = {Asiri Rathnayake and Hayo Thielecke},
+ − 377
date-added = {2019-08-18 17:57:30 +0000},
+ − 378
date-modified = {2019-08-18 18:00:13 +0000},
+ − 379
journal = {arXiv:1405.7058},
+ − 380
title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics},
+ − 381
year = {2017}}
518
+ − 382
471
+ − 383
+ − 384
@article{alfred2014algorithms,
518
+ − 385
author = {Alfred, V},
+ − 386
journal = {Algorithms and Complexity},
+ − 387
pages = {255},
+ − 388
publisher = {Elsevier},
+ − 389
title = {Algorithms for finding patterns in strings},
+ − 390
volume = {1},
+ − 391
year = {2014}}
471
+ − 392
624
+ − 393
468
+ − 394
@article{nielson11bcre,
+ − 395
author = {Lasse Nielsen, Fritz Henglein},
+ − 396
date-added = {2019-07-03 21:09:39 +0000},
+ − 397
date-modified = {2019-07-03 21:17:33 +0000},
+ − 398
journal = {LATA},
+ − 399
title = {Bit-coded Regular Expression Parsing},
+ − 400
year = {2011},
+ − 401
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgTWFjIChNYWNCb29rLVBybykvRGVza3RvcC9mcml0ei1wYXBlci5wZGZPEQF+AAAAAAF+AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8PZnJpdHotcGFwZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAADAAMAAAogY3UAAAAAAAAAAAAAAAAAB0Rlc2t0b3AAAAIAQi86VXNlcnM6Y3N0YW46RHJvcGJveDpNeSBNYWMgKE1hY0Jvb2stUHJvKTpEZXNrdG9wOmZyaXR6LXBhcGVyLnBkZgAOACAADwBmAHIAaQB0AHoALQBwAGEAcABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEBVc2Vycy9jc3Rhbi9Ecm9wYm94L015IE1hYyAoTWFjQm9vay1Qcm8pL0Rlc2t0b3AvZnJpdHotcGFwZXIucGRmABMAAS8AABUAAgAM//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}
518
+ − 402
468
+ − 403
@software{regexploit2021,
+ − 404
author = {Ben Caller, Luca Carettoni},
+ − 405
date-added = {2020-11-24 00:00:00 +0000},
+ − 406
date-modified = {2021-05-07 00:00:00 +0000},
+ − 407
keywords = {ReDos static analyser},
472
+ − 408
month = {May},
+ − 409
title = {regexploit},
+ − 410
url = {https://github.com/doyensec/regexploit},
518
+ − 411
year = {2021},
+ − 412
bdsk-url-1 = {https://github.com/doyensec/regexploit}}
+ − 413
608
+ − 414
@software{pcre,
+ − 415
author = {Philip Hazel},
+ − 416
date-added = {1997-01-01 00:00:00 +0000},
+ − 417
date-modified = {2021-06-14 00:00:00 +0000},
+ − 418
keywords = {Perl Compatible Regular Expressions},
+ − 419
month = {June},
+ − 420
title = {PCRE},
+ − 421
url = {https://www.pcre.org/original/doc/html/},
+ − 422
year = {2021},
+ − 423
bdsk-url-1 = {https://www.pcre.org/original/doc/html/}
+ − 424
}
+ − 425
471
+ − 426
@misc{KuklewiczHaskell,
+ − 427
author = {Kuklewicz},
+ − 428
keywords = {Buggy C POSIX Lexing Libraries},
518
+ − 429
title = {Regex Posix},
471
+ − 430
url = {https://wiki.haskell.org/Regex_Posix},
518
+ − 431
year = {2017},
+ − 432
bdsk-url-1 = {https://wiki.haskell.org/Regex_Posix}}
+ − 433
472
+ − 434
@misc{regex101,
+ − 435
author = {Firas Dib},
518
+ − 436
keywords = {regex tester debugger},
+ − 437
title = {regex101},
472
+ − 438
url = {https://regex101.com/},
518
+ − 439
year = {2011},
+ − 440
bdsk-url-1 = {https://regex101.com/}}
+ − 441
500
+ − 442
@misc{atomEditor,
518
+ − 443
author = {Dunno},
+ − 444
keywords = {text editor},
500
+ − 445
title = {Atom Editor},
518
+ − 446
url = {https://atom.io},
500
+ − 447
year = {2022},
518
+ − 448
bdsk-url-1 = {https://atom.io}}
+ − 449
471
+ − 450
@techreport{grathwohl2014crash,
518
+ − 451
author = {Grathwohl, Niels Bj{\o}rn Bugge and Henglein, Fritz and Rasmussen, Ulrik Terp},
+ − 452
institution = {Technical report, University of Copenhagen},
+ − 453
title = {A Crash-Course in Regular Expression Parsing and Regular Expressions as Types},
+ − 454
year = {2014}}
471
+ − 455
602
+ − 456
@inproceedings{xml2015,
+ − 457
author = {Bj\"{o}rklund, Henrik and Martens, Wim and Timm, Thomas},
+ − 458
title = {Efficient Incremental Evaluation of Succinct Regular Expressions},
+ − 459
year = {2015},
+ − 460
isbn = {9781450337946},
+ − 461
publisher = {Association for Computing Machinery},
+ − 462
address = {New York, NY, USA},
+ − 463
url = {https://doi.org/10.1145/2806416.2806434},
+ − 464
doi = {10.1145/2806416.2806434},
+ − 465
abstract = {Regular expressions are omnipresent in database applications. They form the structural core of schema languages for XML, they are a fundamental ingredient for navigational queries in graph databases, and are being considered in languages for upcoming technologies such as schema- and transformation languages for tabular data on the Web. In this paper we study the usage and effectiveness of the counting operator (or: limited repetition) in regular expressions. The counting operator is a popular extension which is part of the POSIX standard and therefore also present in regular expressions in grep, Java, Python, Perl, and Ruby. In a database context, expressions with counting appear in XML Schema and languages for querying graphs such as SPARQL 1.1 and Cypher.We first present a practical study that suggests that counters are extensively used in practice. We then investigate evaluation methods for such expressions and develop a new algorithm for efficient incremental evaluation. Finally, we conduct an extensive benchmark study that shows that exploiting counting operators can lead to speed-ups of several orders of magnitude in a wide range of settings: normal and incremental evaluation on synthetic and real expressions.},
+ − 466
booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management},
+ − 467
pages = {1541–1550},
+ − 468
numpages = {10},
+ − 469
keywords = {regular expressions, schema, regular path queries, xml},
+ − 470
location = {Melbourne, Australia},
+ − 471
series = {CIKM '15}
+ − 472
}
+ − 473
+ − 474
468
+ − 475
@misc{SE16,
+ − 476
author = {StackStatus},
+ − 477
date-added = {2019-06-26 11:28:41 +0000},
+ − 478
date-modified = {2019-06-26 16:07:31 +0000},
+ − 479
keywords = {ReDos Attack},
+ − 480
month = {July},
+ − 481
rating = {5},
+ − 482
read = {1},
+ − 483
title = {Stack Overflow Outage Postmortem},
+ − 484
url = {https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016},
+ − 485
year = {2016},
+ − 486
bdsk-url-1 = {https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}}
+ − 487
+ − 488
@article{HosoyaVouillonPierce2005,
+ − 489
author = {H.~Hosoya and J.~Vouillon and B.~C.~Pierce},
+ − 490
journal = {ACM Transactions on Programming Languages and Systems (TOPLAS)},
+ − 491
number = 1,
+ − 492
pages = {46--90},
+ − 493
title = {{R}egular {E}xpression {T}ypes for {XML}},
+ − 494
volume = 27,
+ − 495
year = {2005}}
+ − 496
+ − 497
@misc{POSIX,
+ − 498
note = {\url{http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html}},
+ − 499
title = {{T}he {O}pen {G}roup {B}ase {S}pecification {I}ssue 6 {IEEE} {S}td 1003.1 2004 {E}dition},
+ − 500
year = {2004}}
+ − 501
+ − 502
@inproceedings{AusafDyckhoffUrban2016,
+ − 503
author = {F.~Ausaf and R.~Dyckhoff and C.~Urban},
+ − 504
booktitle = {Proc.~of the 7th International Conference on Interactive Theorem Proving (ITP)},
+ − 505
pages = {69--86},
+ − 506
series = {LNCS},
+ − 507
title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions ({P}roof {P}earl)},
+ − 508
volume = {9807},
+ − 509
year = {2016}}
+ − 510
+ − 511
@article{aduAFP16,
+ − 512
author = {F.~Ausaf and R.~Dyckhoff and C.~Urban},
+ − 513
issn = {2150-914x},
+ − 514
journal = {Archive of Formal Proofs},
+ − 515
note = {\url{http://www.isa-afp.org/entries/Posix-Lexing.shtml}, Formal proof development},
+ − 516
title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions},
+ − 517
year = 2016}
+ − 518
+ − 519
@techreport{CrashCourse2014,
+ − 520
annote = {draft report},
+ − 521
author = {N.~B.~B.~Grathwohl and F.~Henglein and U.~T.~Rasmussen},
+ − 522
institution = {University of Copenhagen},
+ − 523
title = {{A} {C}rash-{C}ourse in {R}egular {E}xpression {P}arsing and {R}egular {E}xpressions as {T}ypes},
+ − 524
year = {2014}}
+ − 525
+ − 526
@inproceedings{Sulzmann2014,
+ − 527
author = {M.~Sulzmann and K.~Lu},
+ − 528
booktitle = {Proc.~of the 12th International Conference on Functional and Logic Programming (FLOPS)},
+ − 529
pages = {203--220},
+ − 530
series = {LNCS},
+ − 531
title = {{POSIX} {R}egular {E}xpression {P}arsing with {D}erivatives},
+ − 532
volume = {8475},
+ − 533
year = {2014}}
+ − 534
+ − 535
@inproceedings{Sulzmann2014b,
+ − 536
author = {M.~Sulzmann and P.~van Steenhoven},
+ − 537
booktitle = {Proc.~of the 23rd International Conference on Compiler Construction (CC)},
+ − 538
pages = {174--191},
+ − 539
series = {LNCS},
+ − 540
title = {{A} {F}lexible and {E}fficient {ML} {L}exer {T}ool {B}ased on {E}xtended {R}egular {E}xpression {S}ubmatching},
+ − 541
volume = {8409},
+ − 542
year = {2014}}
+ − 543
+ − 544
@book{Pierce2015,
+ − 545
author = {B.~C.~Pierce and C.~Casinghino and M.~Gaboardi and M.~Greenberg and C.~Hri\c{t}cu and V.~Sj\"{o}berg and B.~Yorgey},
+ − 546
note = {\url{http://www.cis.upenn.edu/~bcpierce/sf}},
+ − 547
publisher = {Electronic textbook},
+ − 548
title = {{S}oftware {F}oundations},
+ − 549
year = {2015}}
+ − 550
+ − 551
@misc{Kuklewicz,
+ − 552
author = {C.~Kuklewicz},
+ − 553
howpublished = {\url{https://wiki.haskell.org/Regex_Posix}},
+ − 554
title = {{R}egex {P}osix}}
+ − 555
+ − 556
@article{Vansummeren2006,
+ − 557
author = {S.~Vansummeren},
+ − 558
journal = {ACM Transactions on Programming Languages and Systems},
+ − 559
number = {3},
+ − 560
pages = {389--428},
+ − 561
title = {{T}ype {I}nference for {U}nique {P}attern {M}atching},
+ − 562
volume = {28},
+ − 563
year = {2006}}
+ − 564
+ − 565
@inproceedings{Asperti12,
+ − 566
author = {A.~Asperti},
+ − 567
booktitle = {Proc.~of the 3rd International Conference on Interactive Theorem Proving (ITP)},
+ − 568
pages = {283--298},
+ − 569
series = {LNCS},
+ − 570
title = {{A} {C}ompact {P}roof of {D}ecidability for {R}egular {E}xpression {E}quivalence},
+ − 571
volume = {7406},
+ − 572
year = {2012}}
+ − 573
+ − 574
@inproceedings{Frisch2004,
+ − 575
author = {A.~Frisch and L.~Cardelli},
+ − 576
booktitle = {Proc.~of the 31st International Conference on Automata, Languages and Programming (ICALP)},
+ − 577
pages = {618--629},
+ − 578
series = {LNCS},
+ − 579
title = {{G}reedy {R}egular {E}xpression {M}atching},
+ − 580
volume = {3142},
+ − 581
year = {2004}}
+ − 582
+ − 583
@article{Antimirov95,
+ − 584
author = {V.~Antimirov},
+ − 585
journal = {Theoretical Computer Science},
+ − 586
pages = {291--319},
+ − 587
title = {{P}artial {D}erivatives of {R}egular {E}xpressions and {F}inite {A}utomata {C}onstructions},
+ − 588
volume = {155},
+ − 589
year = {1995}}
+ − 590
+ − 591
@inproceedings{Nipkow98,
+ − 592
author = {T.~Nipkow},
+ − 593
booktitle = {Proc.~of the 11th International Conference on Theorem Proving in Higher Order Logics (TPHOLs)},
+ − 594
pages = {1--15},
+ − 595
series = {LNCS},
+ − 596
title = {{V}erified {L}exical {A}nalysis},
+ − 597
volume = 1479,
+ − 598
year = 1998}
+ − 599
+ − 600
@article{Brzozowski1964,
+ − 601
author = {J.~A.~Brzozowski},
+ − 602
journal = {Journal of the {ACM}},
+ − 603
number = {4},
+ − 604
pages = {481--494},
+ − 605
title = {{D}erivatives of {R}egular {E}xpressions},
+ − 606
volume = {11},
+ − 607
year = {1964}}
+ − 608
+ − 609
@article{Leroy2009,
+ − 610
author = {X.~Leroy},
+ − 611
journal = {Communications of the ACM},
+ − 612
number = 7,
+ − 613
pages = {107--115},
+ − 614
title = {{F}ormal {V}erification of a {R}ealistic {C}ompiler},
+ − 615
volume = 52,
+ − 616
year = 2009}
+ − 617
+ − 618
@inproceedings{Paulson2015,
+ − 619
author = {L.~C.~Paulson},
+ − 620
booktitle = {Proc.~of the 25th International Conference on Automated Deduction (CADE)},
+ − 621
pages = {231--245},
+ − 622
series = {LNAI},
+ − 623
title = {{A} {F}ormalisation of {F}inite {A}utomata {U}sing {H}ereditarily {F}inite {S}ets},
+ − 624
volume = {9195},
+ − 625
year = {2015}}
+ − 626
+ − 627
@article{Wu2014,
+ − 628
author = {C.~Wu and X.~Zhang and C.~Urban},
+ − 629
journal = {Journal of Automatic Reasoning},
+ − 630
number = {4},
+ − 631
pages = {451--480},
+ − 632
title = {{A} {F}ormalisation of the {M}yhill-{N}erode {T}heorem based on {R}egular {E}xpressions},
+ − 633
volume = {52},
+ − 634
year = {2014}}
+ − 635
+ − 636
@inproceedings{Regehr2011,
+ − 637
author = {X.~Yang and Y.~Chen and E.~Eide and J.~Regehr},
+ − 638
booktitle = {Proc.~of the 32nd ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI)},
+ − 639
pages = {283--294},
+ − 640
title = {{F}inding and {U}nderstanding {B}ugs in {C} {C}ompilers},
+ − 641
year = {2011}}
+ − 642
+ − 643
@article{Norrish2014,
+ − 644
author = {A.~Barthwal and M.~Norrish},
+ − 645
journal = {Journal of Computer and System Sciences},
+ − 646
number = {2},
+ − 647
pages = {346--362},
+ − 648
title = {{A} {M}echanisation of {S}ome {C}ontext-{F}ree {L}anguage {T}heory in {HOL4}},
+ − 649
volume = {80},
+ − 650
year = {2014}}
+ − 651
+ − 652
@article{Thompson1968,
+ − 653
author = {K.~Thompson},
+ − 654
issue_date = {June 1968},
+ − 655
journal = {Communications of the ACM},
+ − 656
number = {6},
+ − 657
pages = {419--422},
+ − 658
title = {{P}rogramming {T}echniques: {R}egular {E}xpression {S}earch {A}lgorithm},
+ − 659
volume = {11},
+ − 660
year = {1968}}
+ − 661
+ − 662
@article{Owens2009,
+ − 663
author = {S.~Owens and J.~H.~Reppy and A.~Turon},
+ − 664
journal = {Journal of Functinal Programming},
+ − 665
number = {2},
+ − 666
pages = {173--190},
+ − 667
title = {{R}egular-{E}xpression {D}erivatives {R}e-{E}xamined},
+ − 668
volume = {19},
+ − 669
year = {2009}}
+ − 670
+ − 671
@inproceedings{Sulzmann2015,
+ − 672
author = {M.~Sulzmann and P.~Thiemann},
+ − 673
booktitle = {Proc.~of the 9th International Conference on Language and Automata Theory and Applications (LATA)},
+ − 674
pages = {275--286},
+ − 675
series = {LNCS},
+ − 676
title = {{D}erivatives for {R}egular {S}huffle {E}xpressions},
+ − 677
volume = {8977},
+ − 678
year = {2015}}
+ − 679
+ − 680
@inproceedings{Chen2012,
+ − 681
author = {H.~Chen and S.~Yu},
+ − 682
booktitle = {Proc.~in the International Workshop on Theoretical Computer Science (WTCS)},
+ − 683
pages = {343--356},
+ − 684
series = {LNCS},
+ − 685
title = {{D}erivatives of {R}egular {E}xpressions and an {A}pplication},
+ − 686
volume = {7160},
+ − 687
year = {2012}}
+ − 688
+ − 689
@article{Krauss2011,
+ − 690
author = {A.~Krauss and T.~Nipkow},
+ − 691
journal = {Journal of Automated Reasoning},
+ − 692
pages = {95--106},
+ − 693
title = {{P}roof {P}earl: {R}egular {E}xpression {E}quivalence and {R}elation {A}lgebra},
+ − 694
volume = 49,
+ − 695
year = 2012}
+ − 696
+ − 697
@inproceedings{Traytel2015,
+ − 698
author = {D.~Traytel},
+ − 699
booktitle = {Proc.~of the 24th Annual Conference on Computer Science Logic (CSL)},
+ − 700
pages = {487--503},
+ − 701
series = {LIPIcs},
+ − 702
title = {{A} {C}oalgebraic {D}ecision {P}rocedure for {WS1S}},
+ − 703
volume = {41},
+ − 704
year = {2015}}
+ − 705
+ − 706
@inproceedings{Traytel2013,
+ − 707
author = {D.~Traytel and T.~Nipkow},
+ − 708
booktitle = {Proc.~of the 18th ACM SIGPLAN International Conference on Functional Programming (ICFP)},
+ − 709
pages = {3-12},
+ − 710
title = {{A} {V}erified {D}ecision {P}rocedure for {MSO} on {W}ords {B}ased on {D}erivatives of {R}egular {E}xpressions},
+ − 711
year = 2013}
+ − 712
+ − 713
@inproceedings{Coquand2012,
+ − 714
author = {T.~Coquand and V.~Siles},
+ − 715
booktitle = {Proc.~of the 1st International Conference on Certified Programs and Proofs (CPP)},
+ − 716
pages = {119--134},
+ − 717
series = {LNCS},
+ − 718
title = {{A} {D}ecision {P}rocedure for {R}egular {E}xpression {E}quivalence in {T}ype {T}heory},
+ − 719
volume = {7086},
+ − 720
year = {2011}}
+ − 721
609
+ − 722
+ − 723
468
+ − 724
@inproceedings{Almeidaetal10,
+ − 725
author = {J.~B.~Almeida and N.~Moriera and D.~Pereira and S.~M.~de Sousa},
+ − 726
booktitle = {Proc.~of the 15th International Conference on Implementation and Application of Automata (CIAA)},
+ − 727
pages = {59-68},
+ − 728
series = {LNCS},
+ − 729
title = {{P}artial {D}erivative {A}utomata {F}ormalized in {C}oq},
+ − 730
volume = {6482},
+ − 731
year = {2010}}
+ − 732
+ − 733
@article{Owens2008,
+ − 734
author = {S.~Owens and K.~Slind},
+ − 735
journal = {Higher-Order and Symbolic Computation},
+ − 736
number = {4},
+ − 737
pages = {377--409},
+ − 738
title = {{A}dapting {F}unctional {P}rograms to {H}igher {O}rder {L}ogic},
+ − 739
volume = {21},
+ − 740
year = {2008}}
+ − 741
+ − 742
@article{Owens2,
+ − 743
author = {S.~Owens and K.~Slind},
+ − 744
bibsource = {dblp computer science bibliography, http://dblp.org},
+ − 745
biburl = {http://dblp.uni-trier.de/rec/bib/journals/lisp/OwensS08},
+ − 746
doi = {10.1007/s10990-008-9038-0},
+ − 747
journal = {Higher-Order and Symbolic Computation},
+ − 748
number = {4},
+ − 749
pages = {377--409},
+ − 750
timestamp = {Wed, 16 Dec 2009 13:51:02 +0100},
+ − 751
title = {Adapting functional programs to higher order logic},
+ − 752
url = {http://dx.doi.org/10.1007/s10990-008-9038-0},
+ − 753
volume = {21},
+ − 754
year = {2008},
+ − 755
bdsk-url-1 = {http://dx.doi.org/10.1007/s10990-008-9038-0}}
+ − 756
+ − 757
@misc{PCRE,
+ − 758
title = {{PCRE - Perl Compatible Regular Expressions}},
+ − 759
url = {http://www.pcre.org},
+ − 760
bdsk-url-1 = {http://www.pcre.org}}
+ − 761
609
+ − 762
%@inproceedings{OkuiSuzuki2010,
+ − 763
% author = {S.~Okui and T.~Suzuki},
+ − 764
% booktitle = {Proc.~of the 15th International Conference on Implementation and Application of Automata (CIAA)},
+ − 765
% pages = {231--240},
+ − 766
% series = {LNCS},
+ − 767
% title = {{D}isambiguation in {R}egular {E}xpression {M}atching via {P}osition {A}utomata with {A}ugmented {T}ransitions},
+ − 768
% volume = {6482},
+ − 769
% year = {2010}}
+ − 770
%
468
+ − 771
609
+ − 772
+ − 773
%@techreport{OkuiSuzukiTech,
+ − 774
% author = {S.~Okui and T.~Suzuki},
+ − 775
% institution = {University of Aizu},
+ − 776
% title = {{D}isambiguation in {R}egular {E}xpression {M}atching via {P}osition {A}utomata with {A}ugmented {T}ransitions},
+ − 777
% year = {2013}}
468
+ − 778
+ − 779
@inproceedings{Davis18,
+ − 780
author = {J.~C.~Davis and C.~.A.~Coghlan and F.~Servant and D.~Lee},
+ − 781
booktitle = {Proc.~of the 26th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE)},
+ − 782
numpages = {11},
+ − 783
pages = {246--256},
+ − 784
title = {{T}he {I}mpact of {R}egular {E}xpression {D}enial of {S}ervice ({ReDoS}) in {P}ractice: {A}n {E}mpirical {S}tudy at the {E}cosystem {S}cale},
+ − 785
year = {2018}}