90 bdsk-url-1 = {https://doi.org/10.4204/EPTCS.151.7}} |
98 bdsk-url-1 = {https://doi.org/10.4204/EPTCS.151.7}} |
91 |
99 |
92 @inproceedings{Berglund18, |
100 @inproceedings{Berglund18, |
93 abstract = {Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.}, |
101 abstract = {Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.}, |
94 address = {Cham}, |
102 address = {Cham}, |
95 author = {M.~Berglund and W.~Bester and van der M.~Brink}, |
103 author = {M.~Berglund and W.~Bester and B.~Van Der Merwe}, |
96 booktitle = {Theoretical Aspects of Computing -- ICTAC 2018}, |
104 booktitle = {Theoretical Aspects of Computing -- ICTAC 2018}, |
97 editor = {Fischer, Bernd and Uustalu, Tarmo}, |
105 editor = {Fischer, Bernd and Uustalu, Tarmo}, |
98 isbn = {978-3-030-02508-3}, |
106 isbn = {978-3-030-02508-3}, |
99 pages = {99--115}, |
107 pages = {99--115}, |
100 publisher = {Springer International Publishing}, |
108 publisher = {Springer International Publishing}, |
154 year = {2012}, |
162 year = {2012}, |
155 bdsk-url-1 = {https://doi.org/10.1007/978-3-642-31653-1_8}} |
163 bdsk-url-1 = {https://doi.org/10.1007/978-3-642-31653-1_8}} |
156 |
164 |
157 @article{BERGLUND2022, |
165 @article{BERGLUND2022, |
158 abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.}, |
166 abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.}, |
159 author = {M.~Berglund and van der M.~Brink }, |
167 author = {M.~Berglund and B.~Van Der Merwe}, |
160 doi = {https://doi.org/10.1016/j.tcs.2022.10.041}, |
168 doi = {https://doi.org/10.1016/j.tcs.2022.10.041}, |
161 issn = {0304-3975}, |
169 issn = {0304-3975}, |
162 journal = {Theoretical Computer Science}, |
170 journal = {Theoretical Computer Science}, |
163 keywords = {Regular expressions, Backreferences}, |
171 keywords = {Regular expressions, Backreferences}, |
164 title = {Re-examining regular expressions with backreferences}, |
172 title = {Re-examining regular expressions with backreferences}, |
341 bdsk-url-1 = {http://infoscience.epfl.ch/record/287059}, |
349 bdsk-url-1 = {http://infoscience.epfl.ch/record/287059}, |
342 bdsk-url-2 = {https://doi.org/10.5075/epfl-thesis-7357}} |
350 bdsk-url-2 = {https://doi.org/10.5075/epfl-thesis-7357}} |
343 |
351 |
344 |
352 |
345 |
353 |
346 @inproceedings{Paulson2015,, |
|
347 author = {L.~Paulson}, |
|
348 year = {2015}, |
|
349 month = {05}, |
|
350 pages = {}, |
|
351 title = {A Formalisation of Finite Automata Using Hereditarily Finite Sets}, |
|
352 isbn = {978-3-319-21400-9}, |
|
353 doi = {10.1007/978-3-319-21401-6_15} |
|
354 } |
|
355 |
354 |
356 @inproceedings{Zippy2020, |
355 @inproceedings{Zippy2020, |
357 abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.}, |
356 abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.}, |
358 address = {New York, NY, USA}, |
357 address = {New York, NY, USA}, |
359 author = {R.~Edelmannand H.~Jad and K.~Viktor}, |
358 author = {R.~Edelmann and H.~Jad and K.~Viktor}, |
360 booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
359 booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
361 doi = {10.1145/3385412.3385992}, |
360 doi = {10.1145/3385412.3385992}, |
362 isbn = {9781450376136}, |
361 isbn = {9781450376136}, |
363 keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives}, |
362 keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives}, |
364 location = {London, UK}, |
363 location = {London, UK}, |
502 title = {Extending finite automata to efficiently match Perl-compatible regular expressions}, |
501 title = {Extending finite automata to efficiently match Perl-compatible regular expressions}, |
503 year = {2008}, |
502 year = {2008}, |
504 bdsk-url-1 = {https://doi.org/10.1145/1544012.1544037}} |
503 bdsk-url-1 = {https://doi.org/10.1145/1544012.1544037}} |
505 |
504 |
506 @book{Sakarovitch2009, |
505 @book{Sakarovitch2009, |
507 author = {Sakarovitch, Jacques}, |
506 author = {J.~Sakarovitch}, |
508 doi = {10.1017/CBO9781139195218}, |
507 doi = {10.1017/CBO9781139195218}, |
509 editor = {Thomas, ReubenTranslator}, |
508 editor = {Thomas, ReubenTranslator}, |
510 place = {Cambridge}, |
509 place = {Cambridge}, |
511 publisher = {Cambridge University Press}, |
510 publisher = {Cambridge University Press}, |
512 title = {Elements of Automata Theory}, |
511 title = {Elements of Automata Theory}, |
513 year = {2009}, |
512 year = {2009}, |
514 bdsk-url-1 = {https://doi.org/10.1017/CBO9781139195218}} |
513 bdsk-url-1 = {https://doi.org/10.1017/CBO9781139195218}} |
515 |
514 |
516 @unpublished{CSL2022, |
515 @unpublished{FoSSaCS2023, |
517 author = {Chengsong Tan and Christian Urban}, |
516 author = {C.~Tan and C.~Urban}, |
518 note = {submitted}, |
517 note = {submitted}, |
519 title = {POSIX Lexing with Bitcoded Derivatives}} |
518 title = {POSIX Lexing with Bitcoded Derivatives}} |
520 |
519 |
521 @inproceedings{Verbatim, |
520 @inproceedings{Verbatim, |
522 author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, |
521 author = {D.~Egolf and S.~Lasser and K.~Fisher}, |
523 booktitle = {2021 IEEE Security and Privacy Workshops (SPW)}, |
522 booktitle = {2021 IEEE Security and Privacy Workshops (SPW)}, |
524 doi = {10.1109/SPW53761.2021.00022}, |
523 doi = {10.1109/SPW53761.2021.00022}, |
525 pages = {92-100}, |
524 pages = {92-100}, |
526 title = {Verbatim: A Verified Lexer Generator}, |
525 title = {Verbatim: A Verified Lexer Generator}, |
527 year = {2021}, |
526 year = {2021}, |
549 |
548 |
550 |
549 |
551 @inproceedings{Verbatimpp, |
550 @inproceedings{Verbatimpp, |
552 abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.}, |
551 abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.}, |
553 address = {New York, NY, USA}, |
552 address = {New York, NY, USA}, |
554 author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, |
553 author = {D.~Egolf and S.~Lasser and K.~Fisher}, |
555 booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs}, |
554 booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs}, |
556 doi = {10.1145/3497775.3503694}, |
555 doi = {10.1145/3497775.3503694}, |
557 isbn = {9781450391825}, |
556 isbn = {9781450391825}, |
558 keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions}, |
557 keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions}, |
559 location = {Philadelphia, PA, USA}, |
558 location = {Philadelphia, PA, USA}, |
565 url = {https://doi.org/10.1145/3497775.3503694}, |
564 url = {https://doi.org/10.1145/3497775.3503694}, |
566 year = {2022}, |
565 year = {2022}, |
567 bdsk-url-1 = {https://doi.org/10.1145/3497775.3503694}} |
566 bdsk-url-1 = {https://doi.org/10.1145/3497775.3503694}} |
568 |
567 |
569 @article{Turo_ov__2020, |
568 @article{Turo_ov__2020, |
570 author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar}, |
569 author = {L.~Turo{\v{n}}ov{\'{a}} and L.~Hol{\'{\i}}k and O.~Leng{\'{a}}l and O.~Saarikivi and M.~Veanes and T.~Vojnar}, |
571 date-added = {2022-05-23 18:43:04 +0100}, |
570 date-added = {2022-05-23 18:43:04 +0100}, |
572 date-modified = {2022-05-23 18:43:04 +0100}, |
571 date-modified = {2022-05-23 18:43:04 +0100}, |
573 doi = {10.1145/3428286}, |
572 doi = {10.1145/3428286}, |
574 journal = {Proceedings of the {ACM} on Programming Languages}, |
573 journal = {Proceedings of the {ACM} on Programming Languages}, |
575 month = {nov}, |
574 month = {nov}, |
582 year = 2020, |
581 year = 2020, |
583 bdsk-url-1 = {https://doi.org/10.1145%2F3428286}, |
582 bdsk-url-1 = {https://doi.org/10.1145%2F3428286}, |
584 bdsk-url-2 = {https://doi.org/10.1145/3428286}} |
583 bdsk-url-2 = {https://doi.org/10.1145/3428286}} |
585 |
584 |
586 @article{Rathnayake2014StaticAF, |
585 @article{Rathnayake2014StaticAF, |
587 author = {Asiri Rathnayake and Hayo Thielecke}, |
586 author = {A.~Rathnayake and H.~Thielecke}, |
588 journal = {ArXiv}, |
587 journal = {ArXiv}, |
589 title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics}, |
588 title = {Static Analysis for Regular Expression Exponential Runtime via Substructural Logics}, |
590 volume = {abs/1405.7058}, |
589 volume = {abs/1405.7058}, |
591 year = {2014}} |
590 year = {2014}} |
592 |
591 |
593 @inproceedings{Weideman2017Static, |
592 @inproceedings{Weideman2017Static, |
594 author = {Nicolaas Weideman}, |
593 author = {N.~Weideman}, |
595 title = {Static analysis of regular expressions}, |
594 title = {Static analysis of regular expressions}, |
596 year = {2017}} |
595 year = {2017}} |
597 |
596 |
598 @inproceedings{RibeiroAgda2017, |
597 @inproceedings{RibeiroAgda2017, |
599 abstract = {We describe the formalization of a regular expression (RE) parsing algorithm that produces a bit representation of its parse tree in the dependently typed language Agda. The algorithm computes bit-codes using Brzozowski derivatives and we prove that produced codes are equivalent to parse trees ensuring soundness and completeness w.r.t an inductive RE semantics. We include the certified algorithm in a tool developed by us, named verigrep, for regular expression based search in the style of the well known GNU grep. Practical experiments conducted with this tool are reported.}, |
598 abstract = {We describe the formalization of a regular expression (RE) parsing algorithm that produces a bit representation of its parse tree in the dependently typed language Agda. The algorithm computes bit-codes using Brzozowski derivatives and we prove that produced codes are equivalent to parse trees ensuring soundness and completeness w.r.t an inductive RE semantics. We include the certified algorithm in a tool developed by us, named verigrep, for regular expression based search in the style of the well known GNU grep. Practical experiments conducted with this tool are reported.}, |
600 address = {New York, NY, USA}, |
599 address = {New York, NY, USA}, |
601 articleno = {4}, |
600 articleno = {4}, |
602 author = {Ribeiro, Rodrigo and Bois, Andr\'{e} Du}, |
601 author = {R.~Ribeiro and A.~D.~Bois}, |
603 booktitle = {Proceedings of the 21st Brazilian Symposium on Programming Languages}, |
602 booktitle = {Proceedings of the 21st Brazilian Symposium on Programming Languages}, |
604 date-modified = {2022-03-16 16:38:47 +0000}, |
603 date-modified = {2022-03-16 16:38:47 +0000}, |
605 doi = {10.1145/3125374.3125381}, |
604 doi = {10.1145/3125374.3125381}, |
606 isbn = {9781450353892}, |
605 isbn = {9781450353892}, |
607 keywords = {Certified algorithms, regular expressions, dependent types, bit-codes}, |
606 keywords = {Certified algorithms, regular expressions, dependent types, bit-codes}, |
656 title = {Bit-coded Regular Expression Parsing}, |
655 title = {Bit-coded Regular Expression Parsing}, |
657 year = {2011}, |
656 year = {2011}, |
658 bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgTWFjIChNYWNCb29rLVBybykvRGVza3RvcC9mcml0ei1wYXBlci5wZGZPEQF+AAAAAAF+AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8PZnJpdHotcGFwZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAADAAMAAAogY3UAAAAAAAAAAAAAAAAAB0Rlc2t0b3AAAAIAQi86VXNlcnM6Y3N0YW46RHJvcGJveDpNeSBNYWMgKE1hY0Jvb2stUHJvKTpEZXNrdG9wOmZyaXR6LXBhcGVyLnBkZgAOACAADwBmAHIAaQB0AHoALQBwAGEAcABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEBVc2Vycy9jc3Rhbi9Ecm9wYm94L015IE1hYyAoTWFjQm9vay1Qcm8pL0Rlc2t0b3AvZnJpdHotcGFwZXIucGRmABMAAS8AABUAAgAM//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}} |
657 bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgTWFjIChNYWNCb29rLVBybykvRGVza3RvcC9mcml0ei1wYXBlci5wZGZPEQF+AAAAAAF+AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8PZnJpdHotcGFwZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAADAAMAAAogY3UAAAAAAAAAAAAAAAAAB0Rlc2t0b3AAAAIAQi86VXNlcnM6Y3N0YW46RHJvcGJveDpNeSBNYWMgKE1hY0Jvb2stUHJvKTpEZXNrdG9wOmZyaXR6LXBhcGVyLnBkZgAOACAADwBmAHIAaQB0AHoALQBwAGEAcABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEBVc2Vycy9jc3Rhbi9Ecm9wYm94L015IE1hYyAoTWFjQm9vay1Qcm8pL0Rlc2t0b3AvZnJpdHotcGFwZXIucGRmABMAAS8AABUAAgAM//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}} |
659 |
658 |
660 @software{regexploit2021, |
659 @software{regexploit2021, |
661 author = {Ben Caller, Luca Carettoni}, |
660 author = {B.~Caller, L.~Carettoni}, |
662 date-added = {2020-11-24 00:00:00 +0000}, |
661 date-added = {2020-11-24 00:00:00 +0000}, |
663 date-modified = {2021-05-07 00:00:00 +0000}, |
662 date-modified = {2021-05-07 00:00:00 +0000}, |
664 keywords = {ReDos static analyser}, |
663 keywords = {ReDos static analyser}, |
665 month = {May}, |
664 month = {May}, |
666 title = {regexploit}, |
665 title = {regexploit}, |
667 url = {https://github.com/doyensec/regexploit}, |
666 url = {https://github.com/doyensec/regexploit}, |
668 year = {2021}, |
667 year = {2021}, |
669 bdsk-url-1 = {https://github.com/doyensec/regexploit}} |
668 bdsk-url-1 = {https://github.com/doyensec/regexploit}} |
670 |
669 |
671 @software{pcre, |
670 @software{pcre, |
672 author = {Philip Hazel}, |
671 author = {P.~Hazel}, |
673 date-added = {1997-01-01 00:00:00 +0000}, |
672 date-added = {1997-01-01 00:00:00 +0000}, |
674 date-modified = {2021-06-14 00:00:00 +0000}, |
673 date-modified = {2021-06-14 00:00:00 +0000}, |
675 keywords = {Perl Compatible Regular Expressions}, |
674 keywords = {Perl Compatible Regular Expressions}, |
676 month = {June}, |
675 month = {June}, |
677 title = {PCRE}, |
676 title = {PCRE}, |
684 note = {[Online; last accessed 19-November-2022]}, |
683 note = {[Online; last accessed 19-November-2022]}, |
685 title = {{Snort Community Rules}}, |
684 title = {{Snort Community Rules}}, |
686 year = {2022}} |
685 year = {2022}} |
687 |
686 |
688 @misc{KuklewiczHaskell, |
687 @misc{KuklewiczHaskell, |
689 author = {Kuklewicz}, |
688 author = {C.~Kuklewicz}, |
690 keywords = {Buggy C POSIX Lexing Libraries}, |
689 keywords = {Buggy C POSIX Lexing Libraries}, |
691 title = {Regex Posix}, |
690 title = {Regex Posix}, |
692 url = {https://wiki.haskell.org/Regex_Posix}, |
691 url = {https://wiki.haskell.org/Regex_Posix}, |
693 year = {2017}, |
692 year = {2017}, |
694 bdsk-url-1 = {https://wiki.haskell.org/Regex_Posix}} |
693 bdsk-url-1 = {https://wiki.haskell.org/Regex_Posix}} |
695 |
694 |
696 @misc{regex101, |
695 @misc{regex101, |
697 author = {Firas Dib}, |
696 author = {D.~Firas}, |
698 keywords = {regex tester debugger}, |
697 keywords = {regex tester debugger}, |
699 title = {regex101}, |
698 title = {regex101}, |
700 url = {https://regex101.com/}, |
699 url = {https://regex101.com/}, |
701 year = {2011}, |
700 year = {2011}, |
702 bdsk-url-1 = {https://regex101.com/}} |
701 bdsk-url-1 = {https://regex101.com/}} |
703 |
702 |
704 @misc{atomEditor, |
703 @misc{atomEditor, |
705 author = {Dunno}, |
704 author = {N.~Sobo}, |
706 keywords = {text editor}, |
705 keywords = {text editor}, |
707 title = {Atom Editor}, |
706 title = {Atom Editor}, |
708 url = {https://atom.io}, |
707 url = {https://atom.io}, |
709 year = {2022}, |
708 year = {2022}, |
710 bdsk-url-1 = {https://atom.io}} |
709 bdsk-url-1 = {https://atom.io}} |
711 |
710 |
712 @techreport{grathwohl2014crash, |
711 @techreport{grathwohl2014crash, |
713 author = {Grathwohl, Niels Bj{\o}rn Bugge and Henglein, Fritz and Rasmussen, Ulrik Terp}, |
712 author = {N.~B.~B.~Grathwohl and F.~Henglein and U.~T.~Rasmussen}, |
714 institution = {Technical report, University of Copenhagen}, |
713 institution = {Technical report, University of Copenhagen}, |
715 title = {A Crash-Course in Regular Expression Parsing and Regular Expressions as Types}, |
714 title = {A Crash-Course in Regular Expression Parsing and Regular Expressions as Types}, |
716 year = {2014}} |
715 year = {2014}} |
717 |
716 |
718 @inproceedings{xml2015, |
717 @inproceedings{xml2015, |
719 abstract = {Regular expressions are omnipresent in database applications. They form the structural core of schema languages for XML, they are a fundamental ingredient for navigational queries in graph databases, and are being considered in languages for upcoming technologies such as schema- and transformation languages for tabular data on the Web. In this paper we study the usage and effectiveness of the counting operator (or: limited repetition) in regular expressions. The counting operator is a popular extension which is part of the POSIX standard and therefore also present in regular expressions in grep, Java, Python, Perl, and Ruby. In a database context, expressions with counting appear in XML Schema and languages for querying graphs such as SPARQL 1.1 and Cypher.We first present a practical study that suggests that counters are extensively used in practice. We then investigate evaluation methods for such expressions and develop a new algorithm for efficient incremental evaluation. Finally, we conduct an extensive benchmark study that shows that exploiting counting operators can lead to speed-ups of several orders of magnitude in a wide range of settings: normal and incremental evaluation on synthetic and real expressions.}, |
718 abstract = {Regular expressions are omnipresent in database applications. They form the structural core of schema languages for XML, they are a fundamental ingredient for navigational queries in graph databases, and are being considered in languages for upcoming technologies such as schema- and transformation languages for tabular data on the Web. In this paper we study the usage and effectiveness of the counting operator (or: limited repetition) in regular expressions. The counting operator is a popular extension which is part of the POSIX standard and therefore also present in regular expressions in grep, Java, Python, Perl, and Ruby. In a database context, expressions with counting appear in XML Schema and languages for querying graphs such as SPARQL 1.1 and Cypher.We first present a practical study that suggests that counters are extensively used in practice. We then investigate evaluation methods for such expressions and develop a new algorithm for efficient incremental evaluation. Finally, we conduct an extensive benchmark study that shows that exploiting counting operators can lead to speed-ups of several orders of magnitude in a wide range of settings: normal and incremental evaluation on synthetic and real expressions.}, |
720 address = {New York, NY, USA}, |
719 address = {New York, NY, USA}, |
721 author = {Bj\"{o}rklund, Henrik and Martens, Wim and Timm, Thomas}, |
720 author = {H~.Bj\"{o}rklund and W.~Martens and T.~Timm}, |
722 booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management}, |
721 booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management}, |
723 doi = {10.1145/2806416.2806434}, |
722 doi = {10.1145/2806416.2806434}, |
724 isbn = {9781450337946}, |
723 isbn = {9781450337946}, |
725 keywords = {regular expressions, schema, regular path queries, xml}, |
724 keywords = {regular expressions, schema, regular path queries, xml}, |
726 location = {Melbourne, Australia}, |
725 location = {Melbourne, Australia}, |