468
+ − 1
%% This BibTeX bibliography file was created using BibDesk.
+ − 2
%% https://bibdesk.sourceforge.io/
+ − 3
518
+ − 4
%% Created for CS TAN at 2022-05-23 18:43:50 +0100
468
+ − 5
+ − 6
+ − 7
%% Saved with string encoding Unicode (UTF-8)
607
+ − 8
608
+ − 9
612
+ − 10
%@phdthesis{Fahad18,
+ − 11
% author = "Rempel, Robert Charles",
+ − 12
% title = "Relaxation Effects for Coupled Nuclear Spins",
+ − 13
% school = "Stanford University",
+ − 14
% address = "Stanford, CA",
+ − 15
% year = 1956,
+ − 16
% month = jun
+ − 17
%}
608
+ − 18
609
+ − 19
%% POSIX specification------------------------
+ − 20
@InProceedings{Okui10,
+ − 21
author="Okui, Satoshi
+ − 22
and Suzuki, Taro",
+ − 23
editor="Domaratzki, Michael
+ − 24
and Salomaa, Kai",
+ − 25
title="Disambiguation in Regular Expression Matching via Position Automata with Augmented Transitions",
+ − 26
booktitle="Implementation and Application of Automata",
+ − 27
year="2011",
+ − 28
publisher="Springer Berlin Heidelberg",
+ − 29
address="Berlin, Heidelberg",
+ − 30
pages="231--240",
+ − 31
abstract="This paper offers a new efficient regular expression matching algorithm which follows the POSIX-type leftmost-longest rule. The algorithm basically emulates the subset construction without backtracking, so that its computational cost even in the worst case does not explode exponentially; the time complexity of the algorithm is O(mn(n{\thinspace}+{\thinspace}c)), where m is the length of a given input string, n the number of occurrences of the most frequently used letter in a given regular expression and c the number of subexpressions to be used for capturing substrings. A formalization of the leftmost-longest semantics by using parse trees is also discussed.",
+ − 32
isbn="978-3-642-18098-9"
+ − 33
}
+ − 34
+ − 35
%% POSIX specification------------------------
+ − 36
+ − 37
%% Brzozowski ders------------------------
+ − 38
@article{Berglund14,
+ − 39
author = {Berglund, Martin and Drewes, Frank and Van Der Merwe, Brink},
+ − 40
year = {2014},
+ − 41
month = {05},
+ − 42
pages = {},
+ − 43
title = {Analyzing Catastrophic Backtracking Behavior in Practical Regular Expression Matching},
+ − 44
volume = {151},
+ − 45
journal = {Electronic Proceedings in Theoretical Computer Science},
+ − 46
doi = {10.4204/EPTCS.151.7}
+ − 47
}
+ − 48
+ − 49
@InProceedings{Berglund18,
+ − 50
author="Berglund, Martin
+ − 51
and Bester, Willem
+ − 52
and van der Merwe, Brink",
+ − 53
editor="Fischer, Bernd
+ − 54
and Uustalu, Tarmo",
+ − 55
title="Formalising Boost POSIX Regular Expression Matching",
+ − 56
booktitle="Theoretical Aspects of Computing -- ICTAC 2018",
+ − 57
year="2018",
+ − 58
publisher="Springer International Publishing",
+ − 59
address="Cham",
+ − 60
pages="99--115",
+ − 61
abstract="Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.",
+ − 62
isbn="978-3-030-02508-3"
+ − 63
}
+ − 64
+ − 65
+ − 66
@inproceedings{Chen12,
+ − 67
author = {Chen, Haiming and Yu, Sheng},
+ − 68
year = {2012},
+ − 69
month = {01},
+ − 70
pages = {343-356},
+ − 71
title = {Derivatives of Regular Expressions and an Application},
+ − 72
volume = {7160},
+ − 73
doi = {10.1007/978-3-642-27654-5_27}
+ − 74
}
+ − 75
+ − 76
+ − 77
+ − 78
%% Brzozowski ders------------------------
618
+ − 79
@article{Murugesan2014,
+ − 80
author = {N.~Murugesan and O.~V.~Shanmuga Sundaram},
+ − 81
title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions},
+ − 82
journal = {International Journal of Computer Trends and Technology},
+ − 83
volume = {13},
+ − 84
number = {1},
+ − 85
year = {2014},
+ − 86
url = {http://arxiv.org/abs/1407.5902},
+ − 87
pages = {29--33}
+ − 88
}
609
+ − 89
+ − 90
%% look-aheads------------------------
+ − 91
@article{Takayuki2019,
+ − 92
title={Derivatives of Regular Expressions with Lookahead},
+ − 93
author={Takayuki Miyazaki and Yasuhiko Minamide},
+ − 94
journal={Journal of Information Processing},
+ − 95
volume={27},
+ − 96
number={ },
+ − 97
pages={422-430},
+ − 98
year={2019},
+ − 99
doi={10.2197/ipsjjip.27.422}
+ − 100
}
+ − 101
+ − 102
%% look-aheads------------------------
+ − 103
+ − 104
608
+ − 105
+ − 106
%% back-references--------------------
+ − 107
@incollection{AHO1990255,
+ − 108
title = {CHAPTER 5 - Algorithms for Finding Patterns in Strings},
+ − 109
editor = {JAN {VAN LEEUWEN}},
+ − 110
booktitle = {Algorithms and Complexity},
+ − 111
publisher = {Elsevier},
+ − 112
address = {Amsterdam},
+ − 113
pages = {255-300},
+ − 114
year = {1990},
+ − 115
series = {Handbook of Theoretical Computer Science},
+ − 116
isbn = {978-0-444-88071-0},
+ − 117
doi = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2},
+ − 118
url = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102},
+ − 119
author = {Alfred V. AHO},
+ − 120
abstract = {Publisher Summary
+ − 121
This chapter discusses the algorithms for solving string-matching problems that have proven useful for text-editing and text-processing applications. String pattern matching is an important problem that occurs in many areas of science and information processing. In computing, it occurs naturally as part of data processing, text editing, term rewriting, lexical analysis, and information retrieval. Many text editors and programming languages have facilities for matching strings. In biology, string-matching problems arise in the analysis of nucleic acids and protein sequences, and in the investigation of molecular phylogeny. String matching is also one of the central and most widely studied problems in theoretical computer science. The simplest form of the problem is to locate an occurrence of a keyword as a substring in a sequence of characters, which is called the input string. For example, the input string queueing contains the keyword ueuei as a substring. Even for this problem, several innovative, theoretically interesting algorithms have been devised that run significantly faster than the obvious brute-force method.}
+ − 122
}
+ − 123
+ − 124
%% back-references--------------------
+ − 125
607
+ − 126
@article{fowler2003,
+ − 127
title={An interpretation of the POSIX regex standard},
+ − 128
author={Fowler, Glenn},
+ − 129
journal={URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html},
+ − 130
year={2003}
+ − 131
}
+ − 132
+ − 133
+ − 134
@inproceedings{Snort1999,
+ − 135
author = {Roesch, Martin},
+ − 136
title = {Snort - Lightweight Intrusion Detection for Networks},
+ − 137
year = {1999},
+ − 138
publisher = {USENIX Association},
+ − 139
address = {USA},
+ − 140
abstract = {Network intrusion detection systems (NIDS) are an important part of any network security architecture. They provide a layer of defense which monitors network traffic for predefined suspicious activity or patterns, and alert system administrators when potential hostile traffic is detected. Commercial NIDS have many differences, but Information Systems departments must face the commonalities that they share such as significant system footprint, complex deployment and high monetary cost. Snort was designed to address these issues.},
+ − 141
booktitle = {Proceedings of the 13th USENIX Conference on System Administration},
+ − 142
pages = {229–238},
+ − 143
numpages = {10},
+ − 144
location = {Seattle, Washington},
+ − 145
series = {LISA '99}
+ − 146
}
+ − 147
606
+ − 148
@inproceedings{Becchi08,
+ − 149
author = {Becchi, Michela and Crowley, Patrick},
+ − 150
year = {2008},
+ − 151
month = {01},
+ − 152
pages = {25},
+ − 153
title = {Extending finite automata to efficiently match Perl-compatible regular expressions},
+ − 154
doi = {10.1145/1544012.1544037}
+ − 155
}
603
+ − 156
605
+ − 157
@book{
+ − 158
Sakarovitch2009,
+ − 159
place={Cambridge},
+ − 160
title={Elements of Automata Theory},
+ − 161
DOI={10.1017/CBO9781139195218},
+ − 162
publisher={Cambridge University Press},
+ − 163
author={Sakarovitch, Jacques},
+ − 164
editor={Thomas, ReubenTranslator},
+ − 165
year={2009}
+ − 166
}
+ − 167
+ − 168
607
+ − 169
@unpublished{CSL2022,
603
+ − 170
author = "Chengsong Tan and Christian Urban",
+ − 171
title = "POSIX Lexing with Bitcoded Derivatives",
+ − 172
note = "submitted",
+ − 173
}
+ − 174
601
+ − 175
@INPROCEEDINGS{Verbatim, author={Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, booktitle={2021 IEEE Security and Privacy Workshops (SPW)}, title={Verbatim: A Verified Lexer Generator}, year={2021}, volume={}, number={}, pages={92-100}, doi={10.1109/SPW53761.2021.00022}}
468
+ − 176
518
+ − 177
601
+ − 178
@inproceedings{Verbatimpp,
+ − 179
author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen},
+ − 180
title = {Verbatim++: Verified, Optimized, and Semantically Rich Lexing with Derivatives},
+ − 181
year = {2022},
+ − 182
isbn = {9781450391825},
+ − 183
publisher = {Association for Computing Machinery},
+ − 184
address = {New York, NY, USA},
+ − 185
url = {https://doi.org/10.1145/3497775.3503694},
+ − 186
doi = {10.1145/3497775.3503694},
+ − 187
abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.},
+ − 188
booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs},
+ − 189
pages = {27–39},
+ − 190
numpages = {13},
+ − 191
keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions},
+ − 192
location = {Philadelphia, PA, USA},
+ − 193
series = {CPP 2022}
+ − 194
}
+ − 195
518
+ − 196
+ − 197
@article{Turo_ov__2020,
+ − 198
author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar},
+ − 199
date-added = {2022-05-23 18:43:04 +0100},
+ − 200
date-modified = {2022-05-23 18:43:04 +0100},
+ − 201
doi = {10.1145/3428286},
+ − 202
journal = {Proceedings of the {ACM} on Programming Languages},
+ − 203
month = {nov},
+ − 204
number = {{OOPSLA}},
+ − 205
pages = {1--30},
+ − 206
publisher = {Association for Computing Machinery ({ACM})},
+ − 207
title = {Regex matching with counting-set automata},
+ − 208
url = {https://doi.org/10.1145%2F3428286},
+ − 209
volume = {4},
+ − 210
year = 2020,
+ − 211
bdsk-url-1 = {https://doi.org/10.1145%2F3428286},
+ − 212
bdsk-url-2 = {https://doi.org/10.1145/3428286}}
+ − 213
468
+ − 214
@article{Rathnayake2014StaticAF,
518
+ − 215
author = {Asiri Rathnayake and Hayo Thielecke},
+ − 216
journal = {ArXiv},
+ − 217
title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics},
+ − 218
volume = {abs/1405.7058},
+ − 219
year = {2014}}
468
+ − 220
472
+ − 221
@inproceedings{Weideman2017Static,
518
+ − 222
author = {Nicolaas Weideman},
+ − 223
title = {Static analysis of regular expressions},
+ − 224
year = {2017}}
472
+ − 225
468
+ − 226
@inproceedings{RibeiroAgda2017,
+ − 227
abstract = {We describe the formalization of a regular expression (RE) parsing algorithm that produces a bit representation of its parse tree in the dependently typed language Agda. The algorithm computes bit-codes using Brzozowski derivatives and we prove that produced codes are equivalent to parse trees ensuring soundness and completeness w.r.t an inductive RE semantics. We include the certified algorithm in a tool developed by us, named verigrep, for regular expression based search in the style of the well known GNU grep. Practical experiments conducted with this tool are reported.},
+ − 228
address = {New York, NY, USA},
+ − 229
articleno = {4},
+ − 230
author = {Ribeiro, Rodrigo and Bois, Andr\'{e} Du},
+ − 231
booktitle = {Proceedings of the 21st Brazilian Symposium on Programming Languages},
+ − 232
date-modified = {2022-03-16 16:38:47 +0000},
+ − 233
doi = {10.1145/3125374.3125381},
+ − 234
isbn = {9781450353892},
+ − 235
keywords = {Certified algorithms, regular expressions, dependent types, bit-codes},
+ − 236
location = {Fortaleza, CE, Brazil},
+ − 237
numpages = {8},
+ − 238
publisher = {Association for Computing Machinery},
+ − 239
series = {SBLP 2017},
+ − 240
title = {Certified Bit-Coded Regular Expression Parsing},
+ − 241
url = {https://doi.org/10.1145/3125374.3125381},
+ − 242
year = {2017},
+ − 243
bdsk-url-1 = {https://doi.org/10.1145/3125374.3125381}}
+ − 244
+ − 245
@article{Thompson_1968,
+ − 246
author = {Ken Thompson},
+ − 247
date-added = {2022-02-23 13:44:42 +0000},
+ − 248
date-modified = {2022-02-23 13:44:42 +0000},
+ − 249
doi = {10.1145/363347.363387},
+ − 250
journal = {Communications of the {ACM}},
+ − 251
month = {jun},
+ − 252
number = {6},
+ − 253
pages = {419--422},
+ − 254
publisher = {Association for Computing Machinery ({ACM})},
+ − 255
title = {Programming Techniques: Regular expression search algorithm},
+ − 256
url = {https://doi.org/10.1145%2F363347.363387},
+ − 257
volume = {11},
+ − 258
year = 1968,
+ − 259
bdsk-url-1 = {https://doi.org/10.1145%2F363347.363387},
+ − 260
bdsk-url-2 = {https://doi.org/10.1145/363347.363387}}
+ − 261
+ − 262
@article{17Bir,
+ − 263
author = {Asiri Rathnayake and Hayo Thielecke},
+ − 264
date-added = {2019-08-18 17:57:30 +0000},
+ − 265
date-modified = {2019-08-18 18:00:13 +0000},
+ − 266
journal = {arXiv:1405.7058},
+ − 267
title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics},
+ − 268
year = {2017}}
518
+ − 269
471
+ − 270
@article{campeanu2003formal,
518
+ − 271
author = {C{\^a}mpeanu, Cezar and Salomaa, Kai and Yu, Sheng},
+ − 272
journal = {International Journal of Foundations of Computer Science},
+ − 273
number = {06},
+ − 274
pages = {1007--1018},
+ − 275
publisher = {World Scientific},
+ − 276
title = {A formal study of practical regular expressions},
+ − 277
volume = {14},
+ − 278
year = {2003}}
471
+ − 279
+ − 280
@article{alfred2014algorithms,
518
+ − 281
author = {Alfred, V},
+ − 282
journal = {Algorithms and Complexity},
+ − 283
pages = {255},
+ − 284
publisher = {Elsevier},
+ − 285
title = {Algorithms for finding patterns in strings},
+ − 286
volume = {1},
+ − 287
year = {2014}}
471
+ − 288
+ − 289
@article{CAMPEANU2009Intersect,
518
+ − 290
abstract = {In this paper we revisit the semantics of extended regular expressions (regex), defined succinctly in the 90s [A.V. Aho, Algorithms for finding patterns in strings, in: Jan van Leeuwen (Ed.), Handbook of Theoretical Computer Science, in: Algorithms and Complexity, vol. A, Elsevier and MIT Press, 1990, pp. 255--300] and rigorously in 2003 by C{\^a}mpeanu, Salomaa and Yu [C. C{\^a}mpeanu, K. Salomaa, S. Yu, A formal study of practical regular expressions, IJFCS 14 (6) (2003) 1007--1018], when the authors reported an open problem, namely whether regex languages are closed under the intersection with regular languages. We give a positive answer; and for doing so, we propose a new class of machines --- regex automata systems (RAS) --- which are equivalent to regex. Among others, these machines provide a consistent and convenient method of implementing regex in practice. We also prove, as a consequence of this closure property, that several languages, such as the mirror language, the language of palindromes, and the language of balanced words are not regex languages.},
+ − 291
author = {Cezar C{\^a}mpeanu and Nicolae Santean},
+ − 292
doi = {https://doi.org/10.1016/j.tcs.2009.02.022},
+ − 293
issn = {0304-3975},
+ − 294
journal = {Theoretical Computer Science},
+ − 295
keywords = {Extended regular expression, Regex automata system, Regex},
+ − 296
note = {Formal Languages and Applications: A Collection of Papers in Honor of Sheng Yu},
+ − 297
number = {24},
+ − 298
pages = {2336-2344},
+ − 299
title = {On the intersection of regex languages with regular languages},
+ − 300
url = {https://www.sciencedirect.com/science/article/pii/S0304397509001789},
+ − 301
volume = {410},
+ − 302
year = {2009},
+ − 303
bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789},
+ − 304
bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}}
468
+ − 305
+ − 306
@article{nielson11bcre,
+ − 307
author = {Lasse Nielsen, Fritz Henglein},
+ − 308
date-added = {2019-07-03 21:09:39 +0000},
+ − 309
date-modified = {2019-07-03 21:17:33 +0000},
+ − 310
journal = {LATA},
+ − 311
title = {Bit-coded Regular Expression Parsing},
+ − 312
year = {2011},
+ − 313
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgTWFjIChNYWNCb29rLVBybykvRGVza3RvcC9mcml0ei1wYXBlci5wZGZPEQF+AAAAAAF+AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8PZnJpdHotcGFwZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAADAAMAAAogY3UAAAAAAAAAAAAAAAAAB0Rlc2t0b3AAAAIAQi86VXNlcnM6Y3N0YW46RHJvcGJveDpNeSBNYWMgKE1hY0Jvb2stUHJvKTpEZXNrdG9wOmZyaXR6LXBhcGVyLnBkZgAOACAADwBmAHIAaQB0AHoALQBwAGEAcABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEBVc2Vycy9jc3Rhbi9Ecm9wYm94L015IE1hYyAoTWFjQm9vay1Qcm8pL0Rlc2t0b3AvZnJpdHotcGFwZXIucGRmABMAAS8AABUAAgAM//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}
518
+ − 314
468
+ − 315
@software{regexploit2021,
+ − 316
author = {Ben Caller, Luca Carettoni},
+ − 317
date-added = {2020-11-24 00:00:00 +0000},
+ − 318
date-modified = {2021-05-07 00:00:00 +0000},
+ − 319
keywords = {ReDos static analyser},
472
+ − 320
month = {May},
+ − 321
title = {regexploit},
+ − 322
url = {https://github.com/doyensec/regexploit},
518
+ − 323
year = {2021},
+ − 324
bdsk-url-1 = {https://github.com/doyensec/regexploit}}
+ − 325
608
+ − 326
@software{pcre,
+ − 327
author = {Philip Hazel},
+ − 328
date-added = {1997-01-01 00:00:00 +0000},
+ − 329
date-modified = {2021-06-14 00:00:00 +0000},
+ − 330
keywords = {Perl Compatible Regular Expressions},
+ − 331
month = {June},
+ − 332
title = {PCRE},
+ − 333
url = {https://www.pcre.org/original/doc/html/},
+ − 334
year = {2021},
+ − 335
bdsk-url-1 = {https://www.pcre.org/original/doc/html/}
+ − 336
}
+ − 337
471
+ − 338
@misc{KuklewiczHaskell,
+ − 339
author = {Kuklewicz},
+ − 340
keywords = {Buggy C POSIX Lexing Libraries},
518
+ − 341
title = {Regex Posix},
471
+ − 342
url = {https://wiki.haskell.org/Regex_Posix},
518
+ − 343
year = {2017},
+ − 344
bdsk-url-1 = {https://wiki.haskell.org/Regex_Posix}}
+ − 345
472
+ − 346
@misc{regex101,
+ − 347
author = {Firas Dib},
518
+ − 348
keywords = {regex tester debugger},
+ − 349
title = {regex101},
472
+ − 350
url = {https://regex101.com/},
518
+ − 351
year = {2011},
+ − 352
bdsk-url-1 = {https://regex101.com/}}
+ − 353
500
+ − 354
@misc{atomEditor,
518
+ − 355
author = {Dunno},
+ − 356
keywords = {text editor},
500
+ − 357
title = {Atom Editor},
518
+ − 358
url = {https://atom.io},
500
+ − 359
year = {2022},
518
+ − 360
bdsk-url-1 = {https://atom.io}}
+ − 361
471
+ − 362
@techreport{grathwohl2014crash,
518
+ − 363
author = {Grathwohl, Niels Bj{\o}rn Bugge and Henglein, Fritz and Rasmussen, Ulrik Terp},
+ − 364
institution = {Technical report, University of Copenhagen},
+ − 365
title = {A Crash-Course in Regular Expression Parsing and Regular Expressions as Types},
+ − 366
year = {2014}}
471
+ − 367
602
+ − 368
@inproceedings{xml2015,
+ − 369
author = {Bj\"{o}rklund, Henrik and Martens, Wim and Timm, Thomas},
+ − 370
title = {Efficient Incremental Evaluation of Succinct Regular Expressions},
+ − 371
year = {2015},
+ − 372
isbn = {9781450337946},
+ − 373
publisher = {Association for Computing Machinery},
+ − 374
address = {New York, NY, USA},
+ − 375
url = {https://doi.org/10.1145/2806416.2806434},
+ − 376
doi = {10.1145/2806416.2806434},
+ − 377
abstract = {Regular expressions are omnipresent in database applications. They form the structural core of schema languages for XML, they are a fundamental ingredient for navigational queries in graph databases, and are being considered in languages for upcoming technologies such as schema- and transformation languages for tabular data on the Web. In this paper we study the usage and effectiveness of the counting operator (or: limited repetition) in regular expressions. The counting operator is a popular extension which is part of the POSIX standard and therefore also present in regular expressions in grep, Java, Python, Perl, and Ruby. In a database context, expressions with counting appear in XML Schema and languages for querying graphs such as SPARQL 1.1 and Cypher.We first present a practical study that suggests that counters are extensively used in practice. We then investigate evaluation methods for such expressions and develop a new algorithm for efficient incremental evaluation. Finally, we conduct an extensive benchmark study that shows that exploiting counting operators can lead to speed-ups of several orders of magnitude in a wide range of settings: normal and incremental evaluation on synthetic and real expressions.},
+ − 378
booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management},
+ − 379
pages = {1541–1550},
+ − 380
numpages = {10},
+ − 381
keywords = {regular expressions, schema, regular path queries, xml},
+ − 382
location = {Melbourne, Australia},
+ − 383
series = {CIKM '15}
+ − 384
}
+ − 385
+ − 386
468
+ − 387
@misc{SE16,
+ − 388
author = {StackStatus},
+ − 389
date-added = {2019-06-26 11:28:41 +0000},
+ − 390
date-modified = {2019-06-26 16:07:31 +0000},
+ − 391
keywords = {ReDos Attack},
+ − 392
month = {July},
+ − 393
rating = {5},
+ − 394
read = {1},
+ − 395
title = {Stack Overflow Outage Postmortem},
+ − 396
url = {https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016},
+ − 397
year = {2016},
+ − 398
bdsk-url-1 = {https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}}
+ − 399
+ − 400
@article{HosoyaVouillonPierce2005,
+ − 401
author = {H.~Hosoya and J.~Vouillon and B.~C.~Pierce},
+ − 402
journal = {ACM Transactions on Programming Languages and Systems (TOPLAS)},
+ − 403
number = 1,
+ − 404
pages = {46--90},
+ − 405
title = {{R}egular {E}xpression {T}ypes for {XML}},
+ − 406
volume = 27,
+ − 407
year = {2005}}
+ − 408
+ − 409
@misc{POSIX,
+ − 410
note = {\url{http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html}},
+ − 411
title = {{T}he {O}pen {G}roup {B}ase {S}pecification {I}ssue 6 {IEEE} {S}td 1003.1 2004 {E}dition},
+ − 412
year = {2004}}
+ − 413
+ − 414
@inproceedings{AusafDyckhoffUrban2016,
+ − 415
author = {F.~Ausaf and R.~Dyckhoff and C.~Urban},
+ − 416
booktitle = {Proc.~of the 7th International Conference on Interactive Theorem Proving (ITP)},
+ − 417
pages = {69--86},
+ − 418
series = {LNCS},
+ − 419
title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions ({P}roof {P}earl)},
+ − 420
volume = {9807},
+ − 421
year = {2016}}
+ − 422
+ − 423
@article{aduAFP16,
+ − 424
author = {F.~Ausaf and R.~Dyckhoff and C.~Urban},
+ − 425
issn = {2150-914x},
+ − 426
journal = {Archive of Formal Proofs},
+ − 427
note = {\url{http://www.isa-afp.org/entries/Posix-Lexing.shtml}, Formal proof development},
+ − 428
title = {{POSIX} {L}exing with {D}erivatives of {R}egular {E}xpressions},
+ − 429
year = 2016}
+ − 430
+ − 431
@techreport{CrashCourse2014,
+ − 432
annote = {draft report},
+ − 433
author = {N.~B.~B.~Grathwohl and F.~Henglein and U.~T.~Rasmussen},
+ − 434
institution = {University of Copenhagen},
+ − 435
title = {{A} {C}rash-{C}ourse in {R}egular {E}xpression {P}arsing and {R}egular {E}xpressions as {T}ypes},
+ − 436
year = {2014}}
+ − 437
+ − 438
@inproceedings{Sulzmann2014,
+ − 439
author = {M.~Sulzmann and K.~Lu},
+ − 440
booktitle = {Proc.~of the 12th International Conference on Functional and Logic Programming (FLOPS)},
+ − 441
pages = {203--220},
+ − 442
series = {LNCS},
+ − 443
title = {{POSIX} {R}egular {E}xpression {P}arsing with {D}erivatives},
+ − 444
volume = {8475},
+ − 445
year = {2014}}
+ − 446
+ − 447
@inproceedings{Sulzmann2014b,
+ − 448
author = {M.~Sulzmann and P.~van Steenhoven},
+ − 449
booktitle = {Proc.~of the 23rd International Conference on Compiler Construction (CC)},
+ − 450
pages = {174--191},
+ − 451
series = {LNCS},
+ − 452
title = {{A} {F}lexible and {E}fficient {ML} {L}exer {T}ool {B}ased on {E}xtended {R}egular {E}xpression {S}ubmatching},
+ − 453
volume = {8409},
+ − 454
year = {2014}}
+ − 455
+ − 456
@book{Pierce2015,
+ − 457
author = {B.~C.~Pierce and C.~Casinghino and M.~Gaboardi and M.~Greenberg and C.~Hri\c{t}cu and V.~Sj\"{o}berg and B.~Yorgey},
+ − 458
note = {\url{http://www.cis.upenn.edu/~bcpierce/sf}},
+ − 459
publisher = {Electronic textbook},
+ − 460
title = {{S}oftware {F}oundations},
+ − 461
year = {2015}}
+ − 462
+ − 463
@misc{Kuklewicz,
+ − 464
author = {C.~Kuklewicz},
+ − 465
howpublished = {\url{https://wiki.haskell.org/Regex_Posix}},
+ − 466
title = {{R}egex {P}osix}}
+ − 467
+ − 468
@article{Vansummeren2006,
+ − 469
author = {S.~Vansummeren},
+ − 470
journal = {ACM Transactions on Programming Languages and Systems},
+ − 471
number = {3},
+ − 472
pages = {389--428},
+ − 473
title = {{T}ype {I}nference for {U}nique {P}attern {M}atching},
+ − 474
volume = {28},
+ − 475
year = {2006}}
+ − 476
+ − 477
@inproceedings{Asperti12,
+ − 478
author = {A.~Asperti},
+ − 479
booktitle = {Proc.~of the 3rd International Conference on Interactive Theorem Proving (ITP)},
+ − 480
pages = {283--298},
+ − 481
series = {LNCS},
+ − 482
title = {{A} {C}ompact {P}roof of {D}ecidability for {R}egular {E}xpression {E}quivalence},
+ − 483
volume = {7406},
+ − 484
year = {2012}}
+ − 485
+ − 486
@inproceedings{Frisch2004,
+ − 487
author = {A.~Frisch and L.~Cardelli},
+ − 488
booktitle = {Proc.~of the 31st International Conference on Automata, Languages and Programming (ICALP)},
+ − 489
pages = {618--629},
+ − 490
series = {LNCS},
+ − 491
title = {{G}reedy {R}egular {E}xpression {M}atching},
+ − 492
volume = {3142},
+ − 493
year = {2004}}
+ − 494
+ − 495
@article{Antimirov95,
+ − 496
author = {V.~Antimirov},
+ − 497
journal = {Theoretical Computer Science},
+ − 498
pages = {291--319},
+ − 499
title = {{P}artial {D}erivatives of {R}egular {E}xpressions and {F}inite {A}utomata {C}onstructions},
+ − 500
volume = {155},
+ − 501
year = {1995}}
+ − 502
+ − 503
@inproceedings{Nipkow98,
+ − 504
author = {T.~Nipkow},
+ − 505
booktitle = {Proc.~of the 11th International Conference on Theorem Proving in Higher Order Logics (TPHOLs)},
+ − 506
pages = {1--15},
+ − 507
series = {LNCS},
+ − 508
title = {{V}erified {L}exical {A}nalysis},
+ − 509
volume = 1479,
+ − 510
year = 1998}
+ − 511
+ − 512
@article{Brzozowski1964,
+ − 513
author = {J.~A.~Brzozowski},
+ − 514
journal = {Journal of the {ACM}},
+ − 515
number = {4},
+ − 516
pages = {481--494},
+ − 517
title = {{D}erivatives of {R}egular {E}xpressions},
+ − 518
volume = {11},
+ − 519
year = {1964}}
+ − 520
+ − 521
@article{Leroy2009,
+ − 522
author = {X.~Leroy},
+ − 523
journal = {Communications of the ACM},
+ − 524
number = 7,
+ − 525
pages = {107--115},
+ − 526
title = {{F}ormal {V}erification of a {R}ealistic {C}ompiler},
+ − 527
volume = 52,
+ − 528
year = 2009}
+ − 529
+ − 530
@inproceedings{Paulson2015,
+ − 531
author = {L.~C.~Paulson},
+ − 532
booktitle = {Proc.~of the 25th International Conference on Automated Deduction (CADE)},
+ − 533
pages = {231--245},
+ − 534
series = {LNAI},
+ − 535
title = {{A} {F}ormalisation of {F}inite {A}utomata {U}sing {H}ereditarily {F}inite {S}ets},
+ − 536
volume = {9195},
+ − 537
year = {2015}}
+ − 538
+ − 539
@article{Wu2014,
+ − 540
author = {C.~Wu and X.~Zhang and C.~Urban},
+ − 541
journal = {Journal of Automatic Reasoning},
+ − 542
number = {4},
+ − 543
pages = {451--480},
+ − 544
title = {{A} {F}ormalisation of the {M}yhill-{N}erode {T}heorem based on {R}egular {E}xpressions},
+ − 545
volume = {52},
+ − 546
year = {2014}}
+ − 547
+ − 548
@inproceedings{Regehr2011,
+ − 549
author = {X.~Yang and Y.~Chen and E.~Eide and J.~Regehr},
+ − 550
booktitle = {Proc.~of the 32nd ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI)},
+ − 551
pages = {283--294},
+ − 552
title = {{F}inding and {U}nderstanding {B}ugs in {C} {C}ompilers},
+ − 553
year = {2011}}
+ − 554
+ − 555
@article{Norrish2014,
+ − 556
author = {A.~Barthwal and M.~Norrish},
+ − 557
journal = {Journal of Computer and System Sciences},
+ − 558
number = {2},
+ − 559
pages = {346--362},
+ − 560
title = {{A} {M}echanisation of {S}ome {C}ontext-{F}ree {L}anguage {T}heory in {HOL4}},
+ − 561
volume = {80},
+ − 562
year = {2014}}
+ − 563
+ − 564
@article{Thompson1968,
+ − 565
author = {K.~Thompson},
+ − 566
issue_date = {June 1968},
+ − 567
journal = {Communications of the ACM},
+ − 568
number = {6},
+ − 569
pages = {419--422},
+ − 570
title = {{P}rogramming {T}echniques: {R}egular {E}xpression {S}earch {A}lgorithm},
+ − 571
volume = {11},
+ − 572
year = {1968}}
+ − 573
+ − 574
@article{Owens2009,
+ − 575
author = {S.~Owens and J.~H.~Reppy and A.~Turon},
+ − 576
journal = {Journal of Functinal Programming},
+ − 577
number = {2},
+ − 578
pages = {173--190},
+ − 579
title = {{R}egular-{E}xpression {D}erivatives {R}e-{E}xamined},
+ − 580
volume = {19},
+ − 581
year = {2009}}
+ − 582
+ − 583
@inproceedings{Sulzmann2015,
+ − 584
author = {M.~Sulzmann and P.~Thiemann},
+ − 585
booktitle = {Proc.~of the 9th International Conference on Language and Automata Theory and Applications (LATA)},
+ − 586
pages = {275--286},
+ − 587
series = {LNCS},
+ − 588
title = {{D}erivatives for {R}egular {S}huffle {E}xpressions},
+ − 589
volume = {8977},
+ − 590
year = {2015}}
+ − 591
+ − 592
@inproceedings{Chen2012,
+ − 593
author = {H.~Chen and S.~Yu},
+ − 594
booktitle = {Proc.~in the International Workshop on Theoretical Computer Science (WTCS)},
+ − 595
pages = {343--356},
+ − 596
series = {LNCS},
+ − 597
title = {{D}erivatives of {R}egular {E}xpressions and an {A}pplication},
+ − 598
volume = {7160},
+ − 599
year = {2012}}
+ − 600
+ − 601
@article{Krauss2011,
+ − 602
author = {A.~Krauss and T.~Nipkow},
+ − 603
journal = {Journal of Automated Reasoning},
+ − 604
pages = {95--106},
+ − 605
title = {{P}roof {P}earl: {R}egular {E}xpression {E}quivalence and {R}elation {A}lgebra},
+ − 606
volume = 49,
+ − 607
year = 2012}
+ − 608
+ − 609
@inproceedings{Traytel2015,
+ − 610
author = {D.~Traytel},
+ − 611
booktitle = {Proc.~of the 24th Annual Conference on Computer Science Logic (CSL)},
+ − 612
pages = {487--503},
+ − 613
series = {LIPIcs},
+ − 614
title = {{A} {C}oalgebraic {D}ecision {P}rocedure for {WS1S}},
+ − 615
volume = {41},
+ − 616
year = {2015}}
+ − 617
+ − 618
@inproceedings{Traytel2013,
+ − 619
author = {D.~Traytel and T.~Nipkow},
+ − 620
booktitle = {Proc.~of the 18th ACM SIGPLAN International Conference on Functional Programming (ICFP)},
+ − 621
pages = {3-12},
+ − 622
title = {{A} {V}erified {D}ecision {P}rocedure for {MSO} on {W}ords {B}ased on {D}erivatives of {R}egular {E}xpressions},
+ − 623
year = 2013}
+ − 624
+ − 625
@inproceedings{Coquand2012,
+ − 626
author = {T.~Coquand and V.~Siles},
+ − 627
booktitle = {Proc.~of the 1st International Conference on Certified Programs and Proofs (CPP)},
+ − 628
pages = {119--134},
+ − 629
series = {LNCS},
+ − 630
title = {{A} {D}ecision {P}rocedure for {R}egular {E}xpression {E}quivalence in {T}ype {T}heory},
+ − 631
volume = {7086},
+ − 632
year = {2011}}
+ − 633
609
+ − 634
+ − 635
468
+ − 636
@inproceedings{Almeidaetal10,
+ − 637
author = {J.~B.~Almeida and N.~Moriera and D.~Pereira and S.~M.~de Sousa},
+ − 638
booktitle = {Proc.~of the 15th International Conference on Implementation and Application of Automata (CIAA)},
+ − 639
pages = {59-68},
+ − 640
series = {LNCS},
+ − 641
title = {{P}artial {D}erivative {A}utomata {F}ormalized in {C}oq},
+ − 642
volume = {6482},
+ − 643
year = {2010}}
+ − 644
+ − 645
@article{Owens2008,
+ − 646
author = {S.~Owens and K.~Slind},
+ − 647
journal = {Higher-Order and Symbolic Computation},
+ − 648
number = {4},
+ − 649
pages = {377--409},
+ − 650
title = {{A}dapting {F}unctional {P}rograms to {H}igher {O}rder {L}ogic},
+ − 651
volume = {21},
+ − 652
year = {2008}}
+ − 653
+ − 654
@article{Owens2,
+ − 655
author = {S.~Owens and K.~Slind},
+ − 656
bibsource = {dblp computer science bibliography, http://dblp.org},
+ − 657
biburl = {http://dblp.uni-trier.de/rec/bib/journals/lisp/OwensS08},
+ − 658
doi = {10.1007/s10990-008-9038-0},
+ − 659
journal = {Higher-Order and Symbolic Computation},
+ − 660
number = {4},
+ − 661
pages = {377--409},
+ − 662
timestamp = {Wed, 16 Dec 2009 13:51:02 +0100},
+ − 663
title = {Adapting functional programs to higher order logic},
+ − 664
url = {http://dx.doi.org/10.1007/s10990-008-9038-0},
+ − 665
volume = {21},
+ − 666
year = {2008},
+ − 667
bdsk-url-1 = {http://dx.doi.org/10.1007/s10990-008-9038-0}}
+ − 668
+ − 669
@misc{PCRE,
+ − 670
title = {{PCRE - Perl Compatible Regular Expressions}},
+ − 671
url = {http://www.pcre.org},
+ − 672
bdsk-url-1 = {http://www.pcre.org}}
+ − 673
609
+ − 674
%@inproceedings{OkuiSuzuki2010,
+ − 675
% author = {S.~Okui and T.~Suzuki},
+ − 676
% booktitle = {Proc.~of the 15th International Conference on Implementation and Application of Automata (CIAA)},
+ − 677
% pages = {231--240},
+ − 678
% series = {LNCS},
+ − 679
% title = {{D}isambiguation in {R}egular {E}xpression {M}atching via {P}osition {A}utomata with {A}ugmented {T}ransitions},
+ − 680
% volume = {6482},
+ − 681
% year = {2010}}
+ − 682
%
468
+ − 683
609
+ − 684
+ − 685
%@techreport{OkuiSuzukiTech,
+ − 686
% author = {S.~Okui and T.~Suzuki},
+ − 687
% institution = {University of Aizu},
+ − 688
% title = {{D}isambiguation in {R}egular {E}xpression {M}atching via {P}osition {A}utomata with {A}ugmented {T}ransitions},
+ − 689
% year = {2013}}
468
+ − 690
+ − 691
@inproceedings{Davis18,
+ − 692
author = {J.~C.~Davis and C.~.A.~Coghlan and F.~Servant and D.~Lee},
+ − 693
booktitle = {Proc.~of the 26th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE)},
+ − 694
numpages = {11},
+ − 695
pages = {246--256},
+ − 696
title = {{T}he {I}mpact of {R}egular {E}xpression {D}enial of {S}ervice ({ReDoS}) in {P}ractice: {A}n {E}mpirical {S}tudy at the {E}cosystem {S}cale},
+ − 697
year = {2018}}