lexing: comparison ChengsongTanPhdThesis/example.bib

equal deleted inserted replaced

-:94db2636a296
+:7af4e2420a8c
 pages={3--41},
 year={1956},
 publisher={Princeton, NJ}
 }
+@inproceedings{Paulson2015,
+	author = {L.~C.~Paulson},
+	booktitle = {Proc.~of the 25th International Conference on Automated Deduction (CADE)},
+	pages = {231--245},
+	series = {LNAI},
+	title = {{A} {F}ormalisation of {F}inite {A}utomata {U}sing {H}ereditarily {F}inite {S}ets},
+	volume = {9195},
+	year = {2015}}
 @article{Murugesan2014,
 	author = {N.~Murugesan and O.~V.~Shanmuga Sundaram},
 	journal = {International Journal of Computer Trends and Technology},
 	bdsk-url-1 = {https://doi.org/10.4204/EPTCS.151.7}}
 @inproceedings{Berglund18,
 	abstract = {Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.},
 	address = {Cham},
-	author = {M.~Berglund and W.~Bester and van der M.~Brink},
+	author = {M.~Berglund and W.~Bester and B.~Van Der Merwe},
 	booktitle = {Theoretical Aspects of Computing -- ICTAC 2018},
 	editor = {Fischer, Bernd and Uustalu, Tarmo},
 	isbn = {978-3-030-02508-3},
 	pages = {99--115},
 	publisher = {Springer International Publishing},
 	year = {2012},
 	bdsk-url-1 = {https://doi.org/10.1007/978-3-642-31653-1_8}}
 @article{BERGLUND2022,
 	abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.},
-	author = {M.~Berglund and van der M.~Brink },
+	author = {M.~Berglund and B.~Van Der Merwe},
 	doi = {https://doi.org/10.1016/j.tcs.2022.10.041},
 	issn = {0304-3975},
 	journal = {Theoretical Computer Science},
 	keywords = {Regular expressions, Backreferences},
 	title = {Re-examining regular expressions with backreferences},
 	bdsk-url-1 = {http://infoscience.epfl.ch/record/287059},
 	bdsk-url-2 = {https://doi.org/10.5075/epfl-thesis-7357}}
-@inproceedings{Paulson2015,,
-author = {L.~Paulson},
-year = {2015},
-month = {05},
-pages = {},
-title = {A Formalisation of Finite Automata Using Hereditarily Finite Sets},
-isbn = {978-3-319-21400-9},
-doi = {10.1007/978-3-319-21401-6_15}
-}
 @inproceedings{Zippy2020,
 	abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.},
 	address = {New York, NY, USA},
-	author = {R.~Edelmannand H.~Jad and K.~Viktor},
+	author = {R.~Edelmann and H.~Jad and K.~Viktor},
 	booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation},
 	doi = {10.1145/3385412.3385992},
 	isbn = {9781450376136},
 	keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives},
 	location = {London, UK},
 location = {San Antonio, Texas},
 series = {SSYM'98}
 }
 @INPROCEEDINGS{Sidhu2001,
-author={Sidhu, R. and Prasanna, V.K.},
+author={R.~Sidhu and V.K.~Prasanna},
 booktitle={The 9th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM'01)},
 title={Fast Regular Expression Matching Using FPGAs},
 year={2001},
 volume={},
 number={},
 	title = {Extending finite automata to efficiently match Perl-compatible regular expressions},
 	year = {2008},
 	bdsk-url-1 = {https://doi.org/10.1145/1544012.1544037}}
 @book{Sakarovitch2009,
-	author = {Sakarovitch, Jacques},
+	author = {J.~Sakarovitch},
 	doi = {10.1017/CBO9781139195218},
 	editor = {Thomas, ReubenTranslator},
 	place = {Cambridge},
 	publisher = {Cambridge University Press},
 	title = {Elements of Automata Theory},
 	year = {2009},
 	bdsk-url-1 = {https://doi.org/10.1017/CBO9781139195218}}
-@unpublished{CSL2022,
+@unpublished{FoSSaCS2023,
-	author = {Chengsong Tan and Christian Urban},
+	author = {C.~Tan and C.~Urban},
 	note = {submitted},
 	title = {POSIX Lexing with Bitcoded Derivatives}}
 @inproceedings{Verbatim,
-	author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen},
+	author = {D.~Egolf and S.~Lasser and K.~Fisher},
 	booktitle = {2021 IEEE Security and Privacy Workshops (SPW)},
 	doi = {10.1109/SPW53761.2021.00022},
 	pages = {92-100},
 	title = {Verbatim: A Verified Lexer Generator},
 	year = {2021},
 @inproceedings{Verbatimpp,
 	abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.},
 	address = {New York, NY, USA},
-	author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen},
+	author = {D.~Egolf and S.~Lasser and K.~Fisher},
 	booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs},
 	doi = {10.1145/3497775.3503694},
 	isbn = {9781450391825},
 	keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions},
 	location = {Philadelphia, PA, USA},
 	url = {https://doi.org/10.1145/3497775.3503694},
 	year = {2022},
 	bdsk-url-1 = {https://doi.org/10.1145/3497775.3503694}}
 @article{Turo_ov__2020,
-	author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar},
+	author = {L.~Turo{\v{n}}ov{\'{a}} and L.~Hol{\'{\i}}k and  O.~Leng{\'{a}}l and O.~Saarikivi and M.~Veanes and T.~Vojnar},
 	date-added = {2022-05-23 18:43:04 +0100},
 	date-modified = {2022-05-23 18:43:04 +0100},
 	doi = {10.1145/3428286},
 	journal = {Proceedings of the {ACM} on Programming Languages},
 	month = {nov},
 	year = 2020,
 	bdsk-url-1 = {https://doi.org/10.1145%2F3428286},
 	bdsk-url-2 = {https://doi.org/10.1145/3428286}}
 @article{Rathnayake2014StaticAF,
-	author = {Asiri Rathnayake and Hayo Thielecke},
+	author = {A.~Rathnayake and H.~Thielecke},
 	journal = {ArXiv},
 	title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics},
 	volume = {abs/1405.7058},
 	year = {2014}}
 @inproceedings{Weideman2017Static,
-	author = {Nicolaas Weideman},
+	author = {N.~Weideman},
 	title = {Static analysis of regular expressions},
 	year = {2017}}
 @inproceedings{RibeiroAgda2017,
 	abstract = {We describe the formalization of a regular expression (RE) parsing algorithm that produces a bit representation of its parse tree in the dependently typed language Agda. The algorithm computes bit-codes using Brzozowski derivatives and we prove that produced codes are equivalent to parse trees ensuring soundness and completeness w.r.t an inductive RE semantics. We include the certified algorithm in a tool developed by us, named verigrep, for regular expression based search in the style of the well known GNU grep. Practical experiments conducted with this tool are reported.},
 	address = {New York, NY, USA},
 	articleno = {4},
-	author = {Ribeiro, Rodrigo and Bois, Andr\'{e} Du},
+	author = {R.~Ribeiro and A.~D.~Bois},
 	booktitle = {Proceedings of the 21st Brazilian Symposium on Programming Languages},
 	date-modified = {2022-03-16 16:38:47 +0000},
 	doi = {10.1145/3125374.3125381},
 	isbn = {9781450353892},
 	keywords = {Certified algorithms, regular expressions, dependent types, bit-codes},
 	title = {Bit-coded Regular Expression Parsing},
 	year = {2011},
 	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgTWFjIChNYWNCb29rLVBybykvRGVza3RvcC9mcml0ei1wYXBlci5wZGZPEQF+AAAAAAF+AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8PZnJpdHotcGFwZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAADAAMAAAogY3UAAAAAAAAAAAAAAAAAB0Rlc2t0b3AAAAIAQi86VXNlcnM6Y3N0YW46RHJvcGJveDpNeSBNYWMgKE1hY0Jvb2stUHJvKTpEZXNrdG9wOmZyaXR6LXBhcGVyLnBkZgAOACAADwBmAHIAaQB0AHoALQBwAGEAcABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEBVc2Vycy9jc3Rhbi9Ecm9wYm94L015IE1hYyAoTWFjQm9vay1Qcm8pL0Rlc2t0b3AvZnJpdHotcGFwZXIucGRmABMAAS8AABUAAgAM//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}
 @software{regexploit2021,
-	author = {Ben Caller, Luca Carettoni},
+	author = {B.~Caller, L.~Carettoni},
 	date-added = {2020-11-24 00:00:00 +0000},
 	date-modified = {2021-05-07 00:00:00 +0000},
 	keywords = {ReDos static analyser},
 	month = {May},
 	title = {regexploit},
 	url = {https://github.com/doyensec/regexploit},
 	year = {2021},
 	bdsk-url-1 = {https://github.com/doyensec/regexploit}}
 @software{pcre,
-	author = {Philip Hazel},
+	author = {P.~Hazel},
 	date-added = {1997-01-01 00:00:00 +0000},
 	date-modified = {2021-06-14 00:00:00 +0000},
 	keywords = {Perl Compatible Regular Expressions},
 	month = {June},
 	title = {PCRE},
 	note = {[Online; last accessed 19-November-2022]},
 	title = {{Snort Community Rules}},
 	year = {2022}}
 @misc{KuklewiczHaskell,
-	author = {Kuklewicz},
+	author = {C.~Kuklewicz},
 	keywords = {Buggy C POSIX Lexing Libraries},
 	title = {Regex Posix},
 	url = {https://wiki.haskell.org/Regex_Posix},
 	year = {2017},
 	bdsk-url-1 = {https://wiki.haskell.org/Regex_Posix}}
 @misc{regex101,
-	author = {Firas Dib},
+	author = {D.~Firas},
 	keywords = {regex tester debugger},
 	title = {regex101},
 	url = {https://regex101.com/},
 	year = {2011},
 	bdsk-url-1 = {https://regex101.com/}}
 @misc{atomEditor,
-	author = {Dunno},
+	author = {N.~Sobo},
 	keywords = {text editor},
 	title = {Atom Editor},
 	url = {https://atom.io},
 	year = {2022},
 	bdsk-url-1 = {https://atom.io}}
 @techreport{grathwohl2014crash,
-	author = {Grathwohl, Niels Bj{\o}rn Bugge and Henglein, Fritz and Rasmussen, Ulrik Terp},
+	author = {N.~B.~B.~Grathwohl and F.~Henglein and U.~T.~Rasmussen},
 	institution = {Technical report, University of Copenhagen},
 	title = {A Crash-Course in Regular Expression Parsing and Regular Expressions as Types},
 	year = {2014}}
 @inproceedings{xml2015,
 	abstract = {Regular expressions are omnipresent in database applications. They form the structural core of schema languages for XML, they are a fundamental ingredient for navigational queries in graph databases, and are being considered in languages for upcoming technologies such as schema- and transformation languages for tabular data on the Web. In this paper we study the usage and effectiveness of the counting operator (or: limited repetition) in regular expressions. The counting operator is a popular extension which is part of the POSIX standard and therefore also present in regular expressions in grep, Java, Python, Perl, and Ruby. In a database context, expressions with counting appear in XML Schema and languages for querying graphs such as SPARQL 1.1 and Cypher.We first present a practical study that suggests that counters are extensively used in practice. We then investigate evaluation methods for such expressions and develop a new algorithm for efficient incremental evaluation. Finally, we conduct an extensive benchmark study that shows that exploiting counting operators can lead to speed-ups of several orders of magnitude in a wide range of settings: normal and incremental evaluation on synthetic and real expressions.},
 	address = {New York, NY, USA},
-	author = {Bj\"{o}rklund, Henrik and Martens, Wim and Timm, Thomas},
+	author = {H~.Bj\"{o}rklund and W.~Martens and T.~Timm},
 	booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management},
 	doi = {10.1145/2806416.2806434},
 	isbn = {9781450337946},
 	keywords = {regular expressions, schema, regular path queries, xml},
 	location = {Melbourne, Australia},
 	pages = {107--115},
 	title = {{F}ormal {V}erification of a {R}ealistic {C}ompiler},
 	volume = 52,
 	year = 2009}
-@inproceedings{Paulson2015,
-	author = {L.~C.~Paulson},
-	booktitle = {Proc.~of the 25th International Conference on Automated Deduction (CADE)},
-	pages = {231--245},
-	series = {LNAI},
-	title = {{A} {F}ormalisation of {F}inite {A}utomata {U}sing {H}ereditarily {F}inite {S}ets},
-	volume = {9195},
-	year = {2015}}
 @article{Wu2014,
 	author = {C.~Wu and X.~Zhang and C.~Urban},
 	journal = {Journal of Automatic Reasoning},
 	number = {4},

changeset 628	7af4e2420a8c
parent 627	94db2636a296
child 634	b079aaee5e10