1 %% This BibTeX bibliography file was created using BibDesk. |
1 %% This BibTeX bibliography file was created using BibDesk. |
2 %% https://bibdesk.sourceforge.io/ |
2 %% https://bibdesk.sourceforge.io/ |
3 |
3 |
4 %% Created for CS TAN at 2022-05-23 18:43:50 +0100 |
4 %% Created for CS TAN at 2022-11-20 17:26:32 +0000 |
5 |
5 |
6 |
6 |
7 %% Saved with string encoding Unicode (UTF-8) |
7 %% Saved with string encoding Unicode (UTF-8) |
8 |
8 @inproceedings{Doczkal2013, |
9 |
9 author = {Doczkal, Christian and Kaiser, Jan-Oliver and Smolka, Gert}, |
10 @article{Murugesan2014, |
10 title = {A Constructive Theory of Regular Languages in Coq}, |
11 author = {N.~Murugesan and O.~V.~Shanmuga Sundaram}, |
11 year = {2013}, |
12 title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions}, |
12 isbn = {9783319035444}, |
13 journal = {International Journal of Computer Trends and Technology}, |
|
14 volume = {13}, |
|
15 number = {1}, |
|
16 year = {2014}, |
|
17 url = {http://arxiv.org/abs/1407.5902}, |
|
18 pages = {29--33} |
|
19 } |
|
20 |
|
21 @PhdThesis{Ausaf, |
|
22 author = {F.~Ausaf}, |
|
23 title = {{V}erified {L}exing and {P}arsing}, |
|
24 school = {King's College London}, |
|
25 year = {2018} |
|
26 } |
|
27 |
|
28 %% POSIX specification------------------------ |
|
29 @InProceedings{Okui10, |
|
30 author=" S.~Okui |
|
31 and T.~Suzuki", |
|
32 editor="Domaratzki, Michael |
|
33 and Salomaa, Kai", |
|
34 title="Disambiguation in Regular Expression Matching via Position Automata with Augmented Transitions", |
|
35 booktitle="Implementation and Application of Automata", |
|
36 year="2011", |
|
37 publisher="Springer Berlin Heidelberg", |
|
38 address="Berlin, Heidelberg", |
|
39 pages="231--240", |
|
40 abstract="This paper offers a new efficient regular expression matching algorithm which follows the POSIX-type leftmost-longest rule. The algorithm basically emulates the subset construction without backtracking, so that its computational cost even in the worst case does not explode exponentially; the time complexity of the algorithm is O(mn(n{\thinspace}+{\thinspace}c)), where m is the length of a given input string, n the number of occurrences of the most frequently used letter in a given regular expression and c the number of subexpressions to be used for capturing substrings. A formalization of the leftmost-longest semantics by using parse trees is also discussed.", |
|
41 isbn="978-3-642-18098-9" |
|
42 } |
|
43 |
|
44 %% POSIX specification------------------------ |
|
45 |
|
46 %% Brzozowski ders------------------------ |
|
47 @article{Berglund14, |
|
48 author = {M.~Berglund, F.~Drewes and B.~Van Der Merwe}, |
|
49 year = {2014}, |
|
50 month = {05}, |
|
51 pages = {}, |
|
52 title = {Analyzing Catastrophic Backtracking Behavior in Practical Regular Expression Matching}, |
|
53 volume = {151}, |
|
54 journal = {Electronic Proceedings in Theoretical Computer Science}, |
|
55 doi = {10.4204/EPTCS.151.7} |
|
56 } |
|
57 |
|
58 @InProceedings{Berglund18, |
|
59 author="M.~Berglund |
|
60 and Bester, Willem |
|
61 and van der Merwe, Brink", |
|
62 editor="Fischer, Bernd |
|
63 and Uustalu, Tarmo", |
|
64 title="Formalising Boost POSIX Regular Expression Matching", |
|
65 booktitle="Theoretical Aspects of Computing -- ICTAC 2018", |
|
66 year="2018", |
|
67 publisher="Springer International Publishing", |
|
68 address="Cham", |
|
69 pages="99--115", |
|
70 abstract="Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.", |
|
71 isbn="978-3-030-02508-3" |
|
72 } |
|
73 |
|
74 |
|
75 @inproceedings{Chen12, |
|
76 author = {Chen, Haiming and Yu, Sheng}, |
|
77 year = {2012}, |
|
78 month = {01}, |
|
79 pages = {343-356}, |
|
80 title = {Derivatives of Regular Expressions and an Application}, |
|
81 volume = {7160}, |
|
82 doi = {10.1007/978-3-642-27654-5_27} |
|
83 } |
|
84 |
|
85 |
|
86 |
|
87 %% Brzozowski ders------------------------ |
|
88 %@article{Murugesan2014, |
|
89 % author = {N.~Murugesan and O.~V.~Shanmuga Sundaram}, |
|
90 % title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions}, |
|
91 % journal = {International Journal of Computer Trends and Technology}, |
|
92 % volume = {13}, |
|
93 % number = {1}, |
|
94 % year = {2014}, |
|
95 % url = {http://arxiv.org/abs/1407.5902}, |
|
96 % pages = {29--33} |
|
97 %} |
|
98 |
|
99 %% look-aheads------------------------ |
|
100 @article{Takayuki2019, |
|
101 title={Derivatives of Regular Expressions with Lookahead}, |
|
102 author={Takayuki Miyazaki and Yasuhiko Minamide}, |
|
103 journal={Journal of Information Processing}, |
|
104 volume={27}, |
|
105 number={ }, |
|
106 pages={422-430}, |
|
107 year={2019}, |
|
108 doi={10.2197/ipsjjip.27.422} |
|
109 } |
|
110 |
|
111 %% look-aheads------------------------ |
|
112 |
|
113 |
|
114 |
|
115 %% ------------------------------------- |
|
116 %% back-references-------------------- |
|
117 @article{FERNAU2015287, |
|
118 title = {Pattern matching with variables: A multivariate complexity analysis}, |
|
119 journal = {Information and Computation}, |
|
120 volume = {242}, |
|
121 pages = {287-305}, |
|
122 year = {2015}, |
|
123 issn = {0890-5401}, |
|
124 doi = {https://doi.org/10.1016/j.ic.2015.03.006}, |
|
125 url = {https://www.sciencedirect.com/science/article/pii/S0890540115000218}, |
|
126 author = {H.~Fernau and M.L.~Schmid}, |
|
127 keywords = {Parameterised pattern matching, Function matching, NP-completeness, Membership problem for pattern languages, Morphisms}, |
|
128 abstract = {A pattern α, i.e., a string that contains variables and terminals, matches a terminal word w if w can be obtained by uniformly substituting the variables of α by terminal words. Deciding whether a given terminal word matches a given pattern is NP-complete and this holds for several natural variants of the problem that result from whether or not variables can be erased, whether or not the patterns are required to be terminal-free or whether or not the mapping of variables to terminal words must be injective. We consider numerous parameters of this problem (i.e., number of variables, length of w, length of the words substituted for variables, number of occurrences per variable, cardinality of the terminal alphabet) and for all possible combinations of the parameters (and variants described above), we answer the question whether or not the problem is still NP-complete if these parameters are bounded by constants.} |
|
129 } |
|
130 |
|
131 @inproceedings{Schmid2012, |
|
132 author = {M.L.~Schmid}, |
|
133 title = {Inside the Class of REGEX Languages}, |
|
134 year = {2012}, |
|
135 isbn = {9783642316524}, |
|
136 publisher = {Springer-Verlag}, |
13 publisher = {Springer-Verlag}, |
137 address = {Berlin, Heidelberg}, |
14 address = {Berlin, Heidelberg}, |
138 url = {https://doi.org/10.1007/978-3-642-31653-1_8}, |
15 url = {https://doi.org/10.1007/978-3-319-03545-1_6}, |
139 doi = {10.1007/978-3-642-31653-1_8}, |
16 doi = {10.1007/978-3-319-03545-1_6}, |
140 abstract = {We study different possibilities of combining the concept of homomorphic replacement with regular expressions in order to investigate the class of languages given by extended regular expressions with backreferences (REGEX). It is shown in which regard existing and natural ways to do this fail to reach the expressive power of REGEX. Furthermore, the complexity of the membership problem for REGEX with a bounded number of backreferences is considered.}, |
17 abstract = {We present a formal constructive theory of regular languages consisting of about 1400 lines of Coq/Ssreflect. As representations we consider regular expressions, deterministic and nondeterministic automata, and Myhill and Nerode partitions. We construct computable functions translating between these representations and show that equivalence of representations is decidable. We also establish the usual closure properties, give a minimization algorithm for DFAs, and prove that minimal DFAs are unique up to state renaming. Our development profits much from Ssreflect's support for finite types and graphs.}, |
141 booktitle = {Proceedings of the 16th International Conference on Developments in Language Theory}, |
18 booktitle = {Proceedings of the Third International Conference on Certified Programs and Proofs - Volume 8307}, |
142 pages = {73–84}, |
19 pages = {82–97}, |
|
20 numpages = {16}, |
|
21 keywords = {finite automata, regular expressions, Myhill-Nerode, Ssreflect, Coq, regular languages} |
|
22 } |
|
23 |
|
24 |
|
25 |
|
26 @article{Krauss2012, |
|
27 author={Alexander Krauss and Tobias Nipkow}, |
|
28 title={Proof Pearl: Regular Expression Equivalence and Relation Algebra}, |
|
29 journal={J. Automated Reasoning},volume=49,pages={95--106},year=2012,note={published online March 2011}} |
|
30 |
|
31 |
|
32 |
|
33 @article{kleene1956, |
|
34 title={Representation of events in nerve nets and finite automata}, |
|
35 author={S.C.~Kleene and others}, |
|
36 journal={Automata studies}, |
|
37 volume={34}, |
|
38 pages={3--41}, |
|
39 year={1956}, |
|
40 publisher={Princeton, NJ} |
|
41 } |
|
42 |
|
43 @inproceedings{Sailesh2006, |
|
44 author = {K.~Sailesh and D.~Sarang and Y.~Fang and C.~Patrick and T.~Jonathan}, |
|
45 title = {Algorithms to Accelerate Multiple Regular Expressions Matching for Deep Packet Inspection}, |
|
46 year = {2006}, |
|
47 isbn = {1595933085}, |
|
48 publisher = {Association for Computing Machinery}, |
|
49 address = {New York, NY, USA}, |
|
50 url = {https://doi.org/10.1145/1159913.1159952}, |
|
51 doi = {10.1145/1159913.1159952}, |
|
52 abstract = {There is a growing demand for network devices capable of examining the content of data packets in order to improve network security and provide application-specific services. Most high performance systems that perform deep packet inspection implement simple string matching algorithms to match packets against a large, but finite set of strings. owever, there is growing interest in the use of regular expression-based pattern matching, since regular expressions offer superior expressive power and flexibility. Deterministic finite automata (DFA) representations are typically used to implement regular expressions. However, DFA representations of regular expression sets arising in network applications require large amounts of memory, limiting their practical application.In this paper, we introduce a new representation for regular expressions, called the Delayed Input DFA (D2FA), which substantially reduces space equirements as compared to a DFA. A D2FA is constructed by transforming a DFA via incrementally replacing several transitions of the automaton with a single default transition. Our approach dramatically reduces the number of distinct transitions between states. For a collection of regular expressions drawn from current commercial and academic systems, a D2FA representation reduces transitions by more than 95%. Given the substantially reduced space equirements, we describe an efficient architecture that can perform deep packet inspection at multi-gigabit rates. Our architecture uses multiple on-chip memories in such a way that each remains uniformly occupied and accessed over a short duration, thus effectively distributing the load and enabling high throughput. Our architecture can provide ostffective packet content scanning at OC-192 rates with memory requirements that are consistent with current ASIC technology.}, |
|
53 booktitle = {Proceedings of the 2006 Conference on Applications, Technologies, Architectures, and Protocols for Computer Communications}, |
|
54 pages = {339–350}, |
143 numpages = {12}, |
55 numpages = {12}, |
144 keywords = {extended regular expressions, pattern languages, REGEX, pattern expressions, homomorphic replacement}, |
56 keywords = {deep packet inspection, regular expressions, DFA}, |
145 location = {Taipei, Taiwan}, |
57 location = {Pisa, Italy}, |
146 series = {DLT'12} |
58 series = {SIGCOMM '06} |
147 } |
59 } |
148 |
60 |
149 |
61 |
150 |
62 |
|
63 @article{Murugesan2014, |
|
64 author = {N.~Murugesan and O.~V.~Shanmuga Sundaram}, |
|
65 journal = {International Journal of Computer Trends and Technology}, |
|
66 number = {1}, |
|
67 pages = {29--33}, |
|
68 title = {{S}ome {P}roperties of {B}rzozowski {D}erivatives of {R}egular {E}xpressions}, |
|
69 url = {http://arxiv.org/abs/1407.5902}, |
|
70 volume = {13}, |
|
71 year = {2014}, |
|
72 bdsk-url-1 = {http://arxiv.org/abs/1407.5902}} |
|
73 |
|
74 @phdthesis{Ausaf, |
|
75 author = {F.~Ausaf}, |
|
76 school = {King's College London}, |
|
77 title = {{V}erified {L}exing and {P}arsing}, |
|
78 year = {2018}} |
|
79 |
|
80 @inproceedings{Okui10, |
|
81 abstract = {This paper offers a new efficient regular expression matching algorithm which follows the POSIX-type leftmost-longest rule. The algorithm basically emulates the subset construction without backtracking, so that its computational cost even in the worst case does not explode exponentially; the time complexity of the algorithm is O(mn(n{\thinspace}+{\thinspace}c)), where m is the length of a given input string, n the number of occurrences of the most frequently used letter in a given regular expression and c the number of subexpressions to be used for capturing substrings. A formalization of the leftmost-longest semantics by using parse trees is also discussed.}, |
|
82 address = {Berlin, Heidelberg}, |
|
83 author = {S.~Okui and T.~Suzuki}, |
|
84 booktitle = {Implementation and Application of Automata}, |
|
85 editor = {Domaratzki, Michael and Salomaa, Kai}, |
|
86 isbn = {978-3-642-18098-9}, |
|
87 pages = {231--240}, |
|
88 publisher = {Springer Berlin Heidelberg}, |
|
89 title = {Disambiguation in Regular Expression Matching via Position Automata with Augmented Transitions}, |
|
90 year = {2011}} |
|
91 |
|
92 @article{Berglund14, |
|
93 author = {M.~Berglund, F.~Drewes and B.~Van Der Merwe}, |
|
94 doi = {10.4204/EPTCS.151.7}, |
|
95 journal = {Electronic Proceedings in Theoretical Computer Science}, |
|
96 month = {05}, |
|
97 title = {Analyzing Catastrophic Backtracking Behavior in Practical Regular Expression Matching}, |
|
98 volume = {151}, |
|
99 year = {2014}, |
|
100 bdsk-url-1 = {https://doi.org/10.4204/EPTCS.151.7}} |
|
101 |
|
102 @inproceedings{Berglund18, |
|
103 abstract = {Whereas Perl-compatible regular expression matchers typically exhibit some variation of leftmost-greedy semantics, those conforming to the posix standard are prescribed leftmost-longest semantics. However, the posix standard leaves some room for interpretation, and Fowler and Kuklewicz have done experimental work to confirm differences between various posix matchers. The Boost library has an interesting take on the posix standard, where it maximises the leftmost match not with respect to subexpressions of the regular expression pattern, but rather, with respect to capturing groups. In our work, we provide the first formalisation of Boost semantics, and we analyse the complexity of regular expression matching when using Boost semantics.}, |
|
104 address = {Cham}, |
|
105 author = {M.~Berglund and Bester, Willem and van der Merwe, Brink}, |
|
106 booktitle = {Theoretical Aspects of Computing -- ICTAC 2018}, |
|
107 editor = {Fischer, Bernd and Uustalu, Tarmo}, |
|
108 isbn = {978-3-030-02508-3}, |
|
109 pages = {99--115}, |
|
110 publisher = {Springer International Publishing}, |
|
111 title = {Formalising Boost POSIX Regular Expression Matching}, |
|
112 year = {2018}} |
|
113 |
|
114 @inproceedings{Chen12, |
|
115 author = {Chen, Haiming and Yu, Sheng}, |
|
116 doi = {10.1007/978-3-642-27654-5_27}, |
|
117 month = {01}, |
|
118 pages = {343-356}, |
|
119 title = {Derivatives of Regular Expressions and an Application}, |
|
120 volume = {7160}, |
|
121 year = {2012}, |
|
122 bdsk-url-1 = {https://doi.org/10.1007/978-3-642-27654-5_27}} |
|
123 |
|
124 @article{Takayuki2019, |
|
125 author = {Takayuki Miyazaki and Yasuhiko Minamide}, |
|
126 doi = {10.2197/ipsjjip.27.422}, |
|
127 journal = {Journal of Information Processing}, |
|
128 pages = {422-430}, |
|
129 title = {Derivatives of Regular Expressions with Lookahead}, |
|
130 volume = {27}, |
|
131 year = {2019}, |
|
132 bdsk-url-1 = {https://doi.org/10.2197/ipsjjip.27.422}} |
|
133 |
|
134 @article{FERNAU2015287, |
|
135 abstract = {A pattern α, i.e., a string that contains variables and terminals, matches a terminal word w if w can be obtained by uniformly substituting the variables of α by terminal words. Deciding whether a given terminal word matches a given pattern is NP-complete and this holds for several natural variants of the problem that result from whether or not variables can be erased, whether or not the patterns are required to be terminal-free or whether or not the mapping of variables to terminal words must be injective. We consider numerous parameters of this problem (i.e., number of variables, length of w, length of the words substituted for variables, number of occurrences per variable, cardinality of the terminal alphabet) and for all possible combinations of the parameters (and variants described above), we answer the question whether or not the problem is still NP-complete if these parameters are bounded by constants.}, |
|
136 author = {H.~Fernau and M.L.~Schmid}, |
|
137 doi = {https://doi.org/10.1016/j.ic.2015.03.006}, |
|
138 issn = {0890-5401}, |
|
139 journal = {Information and Computation}, |
|
140 keywords = {Parameterised pattern matching, Function matching, NP-completeness, Membership problem for pattern languages, Morphisms}, |
|
141 pages = {287-305}, |
|
142 title = {Pattern matching with variables: A multivariate complexity analysis}, |
|
143 url = {https://www.sciencedirect.com/science/article/pii/S0890540115000218}, |
|
144 volume = {242}, |
|
145 year = {2015}, |
|
146 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0890540115000218}, |
|
147 bdsk-url-2 = {https://doi.org/10.1016/j.ic.2015.03.006}} |
|
148 |
|
149 @inproceedings{Schmid2012, |
|
150 abstract = {We study different possibilities of combining the concept of homomorphic replacement with regular expressions in order to investigate the class of languages given by extended regular expressions with backreferences (REGEX). It is shown in which regard existing and natural ways to do this fail to reach the expressive power of REGEX. Furthermore, the complexity of the membership problem for REGEX with a bounded number of backreferences is considered.}, |
|
151 address = {Berlin, Heidelberg}, |
|
152 author = {M.L.~Schmid}, |
|
153 booktitle = {Proceedings of the 16th International Conference on Developments in Language Theory}, |
|
154 doi = {10.1007/978-3-642-31653-1_8}, |
|
155 isbn = {9783642316524}, |
|
156 keywords = {extended regular expressions, pattern languages, REGEX, pattern expressions, homomorphic replacement}, |
|
157 location = {Taipei, Taiwan}, |
|
158 numpages = {12}, |
|
159 pages = {73--84}, |
|
160 publisher = {Springer-Verlag}, |
|
161 series = {DLT'12}, |
|
162 title = {Inside the Class of REGEX Languages}, |
|
163 url = {https://doi.org/10.1007/978-3-642-31653-1_8}, |
|
164 year = {2012}, |
|
165 bdsk-url-1 = {https://doi.org/10.1007/978-3-642-31653-1_8}} |
|
166 |
151 @article{BERGLUND2022, |
167 @article{BERGLUND2022, |
152 title = {Re-examining regular expressions with backreferences}, |
168 abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.}, |
153 journal = {Theoretical Computer Science}, |
169 author = {Martin Berglund and Brink {van der Merwe}}, |
154 year = {2022}, |
170 doi = {https://doi.org/10.1016/j.tcs.2022.10.041}, |
155 issn = {0304-3975}, |
171 issn = {0304-3975}, |
156 doi = {https://doi.org/10.1016/j.tcs.2022.10.041}, |
172 journal = {Theoretical Computer Science}, |
157 url = {https://www.sciencedirect.com/science/article/pii/S0304397522006570}, |
173 keywords = {Regular expressions, Backreferences}, |
158 author = {Martin Berglund and Brink {van der Merwe}}, |
174 title = {Re-examining regular expressions with backreferences}, |
159 keywords = {Regular expressions, Backreferences}, |
175 url = {https://www.sciencedirect.com/science/article/pii/S0304397522006570}, |
160 abstract = {Most modern regular expression matching libraries (one of the rare exceptions being Google's RE2) allow backreferences, operations which bind a substring to a variable, allowing it to be matched again verbatim. However, both real-world implementations and definitions in the literature use different syntactic restrictions and have differences in the semantics of the matching of backreferences. Our aim is to compare these various flavors by considering the classes of formal languages that each can describe, establishing, as a result, a hierarchy of language classes. Beyond the hierarchy itself, some complexity results are given, and as part of the effort on comparing language classes new pumping lemmas are established, old classes are extended to new ones, and several incidental results on the nature of these language classes are given.} |
176 year = {2022}, |
161 } |
177 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397522006570}, |
|
178 bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2022.10.041}} |
162 |
179 |
163 @article{FREYDENBERGER20191, |
180 @article{FREYDENBERGER20191, |
164 title = {Deterministic regular expressions with back-references}, |
181 abstract = {Most modern libraries for regular expression matching allow back-references (i.e., repetition operators) that substantially increase expressive power, but also lead to intractability. In order to find a better balance between expressiveness and tractability, we combine these with the notion of determinism for regular expressions used in XML DTDs and XML Schema. This includes the definition of a suitable automaton model, and a generalization of the Glushkov construction. We demonstrate that, compared to their non-deterministic superclass, these deterministic regular expressions with back-references have desirable algorithmic properties (i.e., efficiently solvable membership problem and some decidable problems in static analysis), while, at the same time, their expressive power exceeds that of deterministic regular expressions without back-references.}, |
165 journal = {Journal of Computer and System Sciences}, |
182 author = {Dominik D. Freydenberger and Markus L. Schmid}, |
166 volume = {105}, |
183 doi = {https://doi.org/10.1016/j.jcss.2019.04.001}, |
167 pages = {1-39}, |
184 issn = {0022-0000}, |
168 year = {2019}, |
185 journal = {Journal of Computer and System Sciences}, |
169 issn = {0022-0000}, |
186 keywords = {Deterministic regular expression, Regex, Glushkov automaton}, |
170 doi = {https://doi.org/10.1016/j.jcss.2019.04.001}, |
187 pages = {1-39}, |
171 url = {https://www.sciencedirect.com/science/article/pii/S0022000018301818}, |
188 title = {Deterministic regular expressions with back-references}, |
172 author = {Dominik D. Freydenberger and Markus L. Schmid}, |
189 url = {https://www.sciencedirect.com/science/article/pii/S0022000018301818}, |
173 keywords = {Deterministic regular expression, Regex, Glushkov automaton}, |
190 volume = {105}, |
174 abstract = {Most modern libraries for regular expression matching allow back-references (i.e., repetition operators) that substantially increase expressive power, but also lead to intractability. In order to find a better balance between expressiveness and tractability, we combine these with the notion of determinism for regular expressions used in XML DTDs and XML Schema. This includes the definition of a suitable automaton model, and a generalization of the Glushkov construction. We demonstrate that, compared to their non-deterministic superclass, these deterministic regular expressions with back-references have desirable algorithmic properties (i.e., efficiently solvable membership problem and some decidable problems in static analysis), while, at the same time, their expressive power exceeds that of deterministic regular expressions without back-references.} |
191 year = {2019}, |
175 } |
192 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0022000018301818}, |
176 @InProceedings{Frey2013, |
193 bdsk-url-2 = {https://doi.org/10.1016/j.jcss.2019.04.001}} |
177 author = {Dominik D. Freydenberger}, |
194 |
178 title = {{Extended Regular Expressions: Succinctness and Decidability}}, |
195 @inproceedings{Frey2013, |
179 booktitle = {28th International Symposium on Theoretical Aspects of Computer Science (STACS 2011) }, |
196 address = {Dagstuhl, Germany}, |
180 pages = {507--518}, |
197 annote = {Keywords: extended regular expressions, regex, decidability, non-recursive tradeoffs}, |
181 series = {Leibniz International Proceedings in Informatics (LIPIcs)}, |
198 author = {Dominik D. Freydenberger}, |
182 ISBN = {978-3-939897-25-5}, |
199 booktitle = {28th International Symposium on Theoretical Aspects of Computer Science (STACS 2011)}, |
183 ISSN = {1868-8969}, |
200 doi = {10.4230/LIPIcs.STACS.2011.507}, |
184 year = {2011}, |
201 editor = {Thomas Schwentick and Christoph D{\"u}rr}, |
185 volume = {9}, |
202 isbn = {978-3-939897-25-5}, |
186 editor = {Thomas Schwentick and Christoph D{\"u}rr}, |
203 issn = {1868-8969}, |
187 publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, |
204 pages = {507--518}, |
188 address = {Dagstuhl, Germany}, |
205 publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, |
189 URL = {http://drops.dagstuhl.de/opus/volltexte/2011/3039}, |
206 series = {Leibniz International Proceedings in Informatics (LIPIcs)}, |
190 URN = {urn:nbn:de:0030-drops-30396}, |
207 title = {{Extended Regular Expressions: Succinctness and Decidability}}, |
191 doi = {10.4230/LIPIcs.STACS.2011.507}, |
208 url = {http://drops.dagstuhl.de/opus/volltexte/2011/3039}, |
192 annote = {Keywords: extended regular expressions, regex, decidability, non-recursive tradeoffs} |
209 urn = {urn:nbn:de:0030-drops-30396}, |
193 } |
210 volume = {9}, |
194 |
211 year = {2011}, |
195 |
212 bdsk-url-1 = {http://drops.dagstuhl.de/opus/volltexte/2011/3039}, |
196 |
213 bdsk-url-2 = {https://doi.org/10.4230/LIPIcs.STACS.2011.507}} |
197 %% -------------------------- campeanu related |
214 |
198 @article{campeanu2003formal, |
215 @article{campeanu2003formal, |
199 author = {C.~C{\^a}mpeanu and K.~Salomaa and S.~Yu}, |
216 author = {C.~C{\^a}mpeanu and K.~Salomaa and S.~Yu}, |
200 journal = {International Journal of Foundations of Computer Science}, |
217 journal = {International Journal of Foundations of Computer Science}, |
201 number = {06}, |
218 number = {06}, |
202 pages = {1007--1018}, |
219 pages = {1007--1018}, |
231 volume = {410}, |
248 volume = {410}, |
232 year = {2009}, |
249 year = {2009}, |
233 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789}, |
250 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789}, |
234 bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}} |
251 bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}} |
235 |
252 |
236 |
|
237 @incollection{Aho1990, |
253 @incollection{Aho1990, |
238 title = {CHAPTER 5 - Algorithms for Finding Patterns in Strings}, |
254 abstract = {Publisher Summary |
239 editor = {JAN {VAN LEEUWEN}}, |
255 This chapter discusses the algorithms for solving string-matching problems that have proven useful for text-editing and text-processing applications. String pattern matching is an important problem that occurs in many areas of science and information processing. In computing, it occurs naturally as part of data processing, text editing, term rewriting, lexical analysis, and information retrieval. Many text editors and programming languages have facilities for matching strings. In biology, string-matching problems arise in the analysis of nucleic acids and protein sequences, and in the investigation of molecular phylogeny. String matching is also one of the central and most widely studied problems in theoretical computer science. The simplest form of the problem is to locate an occurrence of a keyword as a substring in a sequence of characters, which is called the input string. For example, the input string queueing contains the keyword ueuei as a substring. Even for this problem, several innovative, theoretically interesting algorithms have been devised that run significantly faster than the obvious brute-force method.}, |
240 booktitle = {Algorithms and Complexity}, |
256 address = {Amsterdam}, |
241 publisher = {Elsevier}, |
257 author = {A.V.~Aho}, |
242 address = {Amsterdam}, |
258 booktitle = {Algorithms and Complexity}, |
243 pages = {255-300}, |
259 doi = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2}, |
244 year = {1990}, |
260 editor = {JAN {VAN LEEUWEN}}, |
245 series = {Handbook of Theoretical Computer Science}, |
261 isbn = {978-0-444-88071-0}, |
246 isbn = {978-0-444-88071-0}, |
262 pages = {255-300}, |
247 doi = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2}, |
263 publisher = {Elsevier}, |
248 url = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102}, |
264 series = {Handbook of Theoretical Computer Science}, |
249 author = {A.V.~Aho}, |
265 title = {CHAPTER 5 - Algorithms for Finding Patterns in Strings}, |
250 abstract = {Publisher Summary |
266 url = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102}, |
251 This chapter discusses the algorithms for solving string-matching problems that have proven useful for text-editing and text-processing applications. String pattern matching is an important problem that occurs in many areas of science and information processing. In computing, it occurs naturally as part of data processing, text editing, term rewriting, lexical analysis, and information retrieval. Many text editors and programming languages have facilities for matching strings. In biology, string-matching problems arise in the analysis of nucleic acids and protein sequences, and in the investigation of molecular phylogeny. String matching is also one of the central and most widely studied problems in theoretical computer science. The simplest form of the problem is to locate an occurrence of a keyword as a substring in a sequence of characters, which is called the input string. For example, the input string queueing contains the keyword ueuei as a substring. Even for this problem, several innovative, theoretically interesting algorithms have been devised that run significantly faster than the obvious brute-force method.} |
267 year = {1990}, |
|
268 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102}, |
|
269 bdsk-url-2 = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2}} |
|
270 |
|
271 @inproceedings{Might2011, |
|
272 abstract = {We present a functional approach to parsing unrestricted context-free grammars based on Brzozowski's derivative of regular expressions. If we consider context-free grammars as recursive regular expressions, Brzozowski's equational theory extends without modification to context-free grammars (and it generalizes to parser combinators). The supporting actors in this story are three concepts familiar to functional programmers - laziness, memoization and fixed points; these allow Brzozowski's original equations to be transliterated into purely functional code in about 30 lines spread over three functions.Yet, this almost impossibly brief implementation has a drawback: its performance is sour - in both theory and practice. The culprit? Each derivative can double the size of a grammar, and with it, the cost of the next derivative.Fortunately, much of the new structure inflicted by the derivative is either dead on arrival, or it dies after the very next derivative. To eliminate it, we once again exploit laziness and memoization to transliterate an equational theory that prunes such debris into working code. Thanks to this compaction, parsing times become reasonable in practice.We equip the functional programmer with two equational theories that, when combined, make for an abbreviated understanding and implementation of a system for parsing context-free languages.}, |
|
273 address = {New York, NY, USA}, |
|
274 author = {M.~Might and D.~Darais and D.~Spiewak}, |
|
275 booktitle = {Proceedings of the 16th ACM SIGPLAN International Conference on Functional Programming}, |
|
276 doi = {10.1145/2034773.2034801}, |
|
277 isbn = {9781450308656}, |
|
278 keywords = {derivative, parsing, context-free grammar, parser combinator, formal languages, regular expressions}, |
|
279 location = {Tokyo, Japan}, |
|
280 numpages = {7}, |
|
281 pages = {189--195}, |
|
282 publisher = {Association for Computing Machinery}, |
|
283 series = {ICFP '11}, |
|
284 title = {Parsing with Derivatives: A Functional Pearl}, |
|
285 url = {https://doi.org/10.1145/2034773.2034801}, |
|
286 year = {2011}, |
|
287 bdsk-url-1 = {https://doi.org/10.1145/2034773.2034801}} |
|
288 |
|
289 @article{10.1145/2034574.2034801, |
|
290 abstract = {We present a functional approach to parsing unrestricted context-free grammars based on Brzozowski's derivative of regular expressions. If we consider context-free grammars as recursive regular expressions, Brzozowski's equational theory extends without modification to context-free grammars (and it generalizes to parser combinators). The supporting actors in this story are three concepts familiar to functional programmers - laziness, memoization and fixed points; these allow Brzozowski's original equations to be transliterated into purely functional code in about 30 lines spread over three functions.Yet, this almost impossibly brief implementation has a drawback: its performance is sour - in both theory and practice. The culprit? Each derivative can double the size of a grammar, and with it, the cost of the next derivative.Fortunately, much of the new structure inflicted by the derivative is either dead on arrival, or it dies after the very next derivative. To eliminate it, we once again exploit laziness and memoization to transliterate an equational theory that prunes such debris into working code. Thanks to this compaction, parsing times become reasonable in practice.We equip the functional programmer with two equational theories that, when combined, make for an abbreviated understanding and implementation of a system for parsing context-free languages.}, |
|
291 address = {New York, NY, USA}, |
|
292 author = {Might, Matthew and Darais, David and Spiewak, Daniel}, |
|
293 doi = {10.1145/2034574.2034801}, |
|
294 issn = {0362-1340}, |
|
295 issue_date = {September 2011}, |
|
296 journal = {SIGPLAN Not.}, |
|
297 keywords = {formal languages, derivative, context-free grammar, regular expressions, parsing, parser combinator}, |
|
298 month = {sep}, |
|
299 number = {9}, |
|
300 numpages = {7}, |
|
301 pages = {189--195}, |
|
302 publisher = {Association for Computing Machinery}, |
|
303 title = {Parsing with Derivatives: A Functional Pearl}, |
|
304 url = {https://doi.org/10.1145/2034574.2034801}, |
|
305 volume = {46}, |
|
306 year = {2011}, |
|
307 bdsk-url-1 = {https://doi.org/10.1145/2034574.2034801}} |
|
308 |
|
309 |
|
310 |
|
311 @inproceedings{Adams2016, |
|
312 title={On the complexity and performance of parsing with derivatives}, |
|
313 author={Adams, Michael D and Hollenbeck, Celeste and Might, Matthew}, |
|
314 booktitle={Proceedings of the 37th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
|
315 pages={224--236}, |
|
316 year={2016} |
252 } |
317 } |
253 |
318 |
254 %% back-references-------------------- |
319 |
255 %% ------------------------------------- |
320 @article{Darragh2020, |
256 |
321 abstract = {Parsing with Derivatives (PwD) is an elegant approach to parsing context-free grammars (CFGs). It takes the equational theory behind Brzozowski's derivative for regular expressions and augments that theory with laziness, memoization, and fixed points. The result is a simple parser for arbitrary CFGs. Although recent work improved the performance of PwD, it remains inefficient due to the algorithm repeatedly traversing some parts of the grammar. In this functional pearl, we show how to avoid this inefficiency by suspending the state of the traversal in a zipper. When subsequent derivatives are taken, we can resume the traversal from where we left off without retraversing already traversed parts of the grammar. However, the original zipper is designed for use with trees, and we want to parse CFGs. CFGs can include shared regions, cycles, and choices between alternates, which makes them incompatible with the traditional tree model for zippers. This paper develops a generalization of zippers to properly handle these additional features. Just as PwD generalized Brzozowski's derivatives from regular expressions to CFGs, we generalize Huet's zippers from trees to CFGs. Abstract The resulting parsing algorithm is concise and efficient: it takes only 31 lines of OCaml code to implement the derivative function but performs 6,500 times faster than the original PwD and 3.24 times faster than the optimized implementation of PwD.}, |
257 |
322 address = {New York, NY, USA}, |
258 |
323 articleno = {108}, |
259 %%---------------------------------------------------------------- |
324 author = {Darragh, Pierce and Adams, Michael D.}, |
260 %%----------------------------------------zippers |
325 doi = {10.1145/3408990}, |
261 @article{10.1145/3408990, |
326 issue_date = {August 2020}, |
262 author = {Darragh, Pierce and Adams, Michael D.}, |
327 journal = {Proc. ACM Program. Lang.}, |
263 title = {Parsing with Zippers (Functional Pearl)}, |
328 keywords = {Derivatives, Zippers, Parsing with Derivatives, Parsing}, |
264 year = {2020}, |
329 month = {aug}, |
265 issue_date = {August 2020}, |
330 number = {ICFP}, |
266 publisher = {Association for Computing Machinery}, |
331 numpages = {28}, |
267 address = {New York, NY, USA}, |
332 publisher = {Association for Computing Machinery}, |
268 volume = {4}, |
333 title = {Parsing with Zippers (Functional Pearl)}, |
269 number = {ICFP}, |
334 url = {https://doi.org/10.1145/3408990}, |
270 url = {https://doi.org/10.1145/3408990}, |
335 volume = {4}, |
271 doi = {10.1145/3408990}, |
336 year = {2020}, |
272 abstract = {Parsing with Derivatives (PwD) is an elegant approach to parsing context-free grammars (CFGs). It takes the equational theory behind Brzozowski's derivative for regular expressions and augments that theory with laziness, memoization, and fixed points. The result is a simple parser for arbitrary CFGs. Although recent work improved the performance of PwD, it remains inefficient due to the algorithm repeatedly traversing some parts of the grammar. In this functional pearl, we show how to avoid this inefficiency by suspending the state of the traversal in a zipper. When subsequent derivatives are taken, we can resume the traversal from where we left off without retraversing already traversed parts of the grammar. However, the original zipper is designed for use with trees, and we want to parse CFGs. CFGs can include shared regions, cycles, and choices between alternates, which makes them incompatible with the traditional tree model for zippers. This paper develops a generalization of zippers to properly handle these additional features. Just as PwD generalized Brzozowski's derivatives from regular expressions to CFGs, we generalize Huet's zippers from trees to CFGs. Abstract The resulting parsing algorithm is concise and efficient: it takes only 31 lines of OCaml code to implement the derivative function but performs 6,500 times faster than the original PwD and 3.24 times faster than the optimized implementation of PwD.}, |
337 bdsk-url-1 = {https://doi.org/10.1145/3408990}} |
273 journal = {Proc. ACM Program. Lang.}, |
338 |
274 month = {aug}, |
339 @article{Edelmann2021, |
275 articleno = {108}, |
340 abstract = {Parsing is the process that enables a computer system to make sense of raw data. Parsing is common to almost all computer systems: It is involved every time sequential data is read and elaborated into structured data. The theory of parsing usually focuses on the binary recognition aspect of parsing and eschews this essential data-elaboration aspect. In this thesis, I present a declarative framework for value-aware parsing that explicitly integrates data elaboration. |
276 numpages = {28}, |
|
277 keywords = {Derivatives, Zippers, Parsing with Derivatives, Parsing} |
|
278 } |
|
279 |
|
280 |
|
281 |
|
282 @article{Edelmann:287059, |
|
283 title = {Efficient Parsing with Derivatives and Zippers}, |
|
284 author = {Edelmann, Romain}, |
|
285 institution = {IINFCOM}, |
|
286 publisher = {EPFL}, |
|
287 address = {Lausanne}, |
|
288 pages = {246}, |
|
289 year = {2021}, |
|
290 abstract = {Parsing is the process that enables a computer system to make sense of raw data. Parsing is common to almost all computer systems: It is involved every time sequential data is read and elaborated into structured data. The theory of parsing usually focuses on the binary recognition aspect of parsing and eschews this essential data-elaboration aspect. In this thesis, I present a declarative framework for value-aware parsing that explicitly integrates data elaboration. |
|
291 Within the framework of the thesis, I present parsing algorithms that are based on the concept of Brzozowski's derivatives. Derivative-based parsing algorithms present several advantages: they are elegant, amenable to formal reasoning, and easy to implement. Unfortunately, the performance of these algorithms in practice is often not competitive with other approaches. In this thesis, I show a general technique inspired by Huet's Zipper to greatly enhance the performance of derivative-based algorithms, and I do so without compromising their elegance, amenability to formal reasoning, or ease of implementation. |
341 Within the framework of the thesis, I present parsing algorithms that are based on the concept of Brzozowski's derivatives. Derivative-based parsing algorithms present several advantages: they are elegant, amenable to formal reasoning, and easy to implement. Unfortunately, the performance of these algorithms in practice is often not competitive with other approaches. In this thesis, I show a general technique inspired by Huet's Zipper to greatly enhance the performance of derivative-based algorithms, and I do so without compromising their elegance, amenability to formal reasoning, or ease of implementation. |
292 First, I present a technique for building efficient tokenisers that is based on Brzozowski's derivatives and Huet's zipper and that does not require the usual burdensome explicit conversion to automata. I prove the technique is correct in Coq and present SILEX, a Scala lexing library based on the technique. I demonstrate that the approach is competitive with state-of-the-art solutions. |
342 First, I present a technique for building efficient tokenisers that is based on Brzozowski's derivatives and Huet's zipper and that does not require the usual burdensome explicit conversion to automata. I prove the technique is correct in Coq and present SILEX, a Scala lexing library based on the technique. I demonstrate that the approach is competitive with state-of-the-art solutions. |
293 Then, I present a characterisation of LL(1) languages based on the concept of should-not-follow sets. I present an algorithm for parsing LL(1) languages with derivatives and zippers. I show a formal proof of the algorithm's correctness and prove its worst-case linear-time complexity. I show how the LL(1) parsing with derivatives and zippers algorithm corresponds to the traditional LL(1) parsing algorithm. |
343 Then, I present a characterisation of LL(1) languages based on the concept of should-not-follow sets. I present an algorithm for parsing LL(1) languages with derivatives and zippers. I show a formal proof of the algorithm's correctness and prove its worst-case linear-time complexity. I show how the LL(1) parsing with derivatives and zippers algorithm corresponds to the traditional LL(1) parsing algorithm. |
294 I then present SCALL1ON, a Scala parsing combinators library for LL(1) languages that incorporates the LL(1) parsing with derivatives and zippers algorithm. I present an expressive and familiar combinator-based interface for describing LL(1) languages. I present techniques that help precisely locate LL(1) conflicts in user code. I discuss several advantages of the parsing with derivatives approach within the context of a parsing library. I also present SCALL1ON's enumeration and pretty-printing features and discuss their implementation. Through a series of benchmarks, I demonstrate the good performance and practicality of the approach. Finally, I present how to adapt the LL(1) parsing with derivatives and zippers algorithm to support arbitrary context-free languages. I show how the adapted algorithm corresponds to general parsing algorithms, such as Earley's parsing algorithm.}, |
344 I then present SCALL1ON, a Scala parsing combinators library for LL(1) languages that incorporates the LL(1) parsing with derivatives and zippers algorithm. I present an expressive and familiar combinator-based interface for describing LL(1) languages. I present techniques that help precisely locate LL(1) conflicts in user code. I discuss several advantages of the parsing with derivatives approach within the context of a parsing library. I also present SCALL1ON's enumeration and pretty-printing features and discuss their implementation. Through a series of benchmarks, I demonstrate the good performance and practicality of the approach. Finally, I present how to adapt the LL(1) parsing with derivatives and zippers algorithm to support arbitrary context-free languages. I show how the adapted algorithm corresponds to general parsing algorithms, such as Earley's parsing algorithm.}, |
295 url = {http://infoscience.epfl.ch/record/287059}, |
345 address = {Lausanne}, |
296 doi = {10.5075/epfl-thesis-7357}, |
346 author = {Edelmann, Romain}, |
297 } |
347 doi = {10.5075/epfl-thesis-7357}, |
|
348 institution = {IINFCOM}, |
|
349 pages = {246}, |
|
350 publisher = {EPFL}, |
|
351 title = {Efficient Parsing with Derivatives and Zippers}, |
|
352 url = {http://infoscience.epfl.ch/record/287059}, |
|
353 year = {2021}, |
|
354 bdsk-url-1 = {http://infoscience.epfl.ch/record/287059}, |
|
355 bdsk-url-2 = {https://doi.org/10.5075/epfl-thesis-7357}} |
298 |
356 |
299 @inproceedings{Zippy2020, |
357 @inproceedings{Zippy2020, |
300 author = {Edelmann, Romain and Hamza, Jad and Kun\v{c}ak, Viktor}, |
358 abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.}, |
301 title = {Zippy LL(1) Parsing with Derivatives}, |
359 address = {New York, NY, USA}, |
302 year = {2020}, |
360 author = {Edelmann, Romain and Hamza, Jad and Kun\v{c}ak, Viktor}, |
303 isbn = {9781450376136}, |
361 booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
304 publisher = {Association for Computing Machinery}, |
362 doi = {10.1145/3385412.3385992}, |
305 address = {New York, NY, USA}, |
363 isbn = {9781450376136}, |
306 url = {https://doi.org/10.1145/3385412.3385992}, |
364 keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives}, |
307 doi = {10.1145/3385412.3385992}, |
365 location = {London, UK}, |
308 abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.}, |
366 numpages = {16}, |
309 booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
367 pages = {1036--1051}, |
310 pages = {1036–1051}, |
368 publisher = {Association for Computing Machinery}, |
311 numpages = {16}, |
369 series = {PLDI 2020}, |
312 keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives}, |
370 title = {Zippy LL(1) Parsing with Derivatives}, |
313 location = {London, UK}, |
371 url = {https://doi.org/10.1145/3385412.3385992}, |
314 series = {PLDI 2020} |
372 year = {2020}, |
315 } |
373 bdsk-url-1 = {https://doi.org/10.1145/3385412.3385992}} |
316 |
|
317 |
|
318 |
|
319 %%----------------------------------------zippers |
|
320 %%---------------------------------------------------------------- |
|
321 |
|
322 |
374 |
323 @article{fowler2003, |
375 @article{fowler2003, |
324 title={An interpretation of the POSIX regex standard}, |
376 author = {Fowler, Glenn}, |
325 author={Fowler, Glenn}, |
377 journal = {URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html}, |
326 journal={URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html}, |
378 title = {An interpretation of the POSIX regex standard}, |
327 year={2003} |
379 year = {2003}} |
328 } |
|
329 |
|
330 |
380 |
331 @inproceedings{Snort1999, |
381 @inproceedings{Snort1999, |
332 author = {Roesch, Martin}, |
382 abstract = {Network intrusion detection systems (NIDS) are an important part of any network security architecture. They provide a layer of defense which monitors network traffic for predefined suspicious activity or patterns, and alert system administrators when potential hostile traffic is detected. Commercial NIDS have many differences, but Information Systems departments must face the commonalities that they share such as significant system footprint, complex deployment and high monetary cost. Snort was designed to address these issues.}, |
333 title = {Snort - Lightweight Intrusion Detection for Networks}, |
383 address = {USA}, |
334 year = {1999}, |
384 author = {M.~Roesch}, |
|
385 booktitle = {Proceedings of the 13th USENIX Conference on System Administration}, |
|
386 location = {Seattle, Washington}, |
|
387 numpages = {10}, |
|
388 pages = {229--238}, |
|
389 publisher = {USENIX Association}, |
|
390 series = {LISA '99}, |
|
391 title = {Snort - Lightweight Intrusion Detection for Networks}, |
|
392 year = {1999}} |
|
393 |
|
394 |
|
395 |
|
396 @inproceedings{Bro, |
|
397 author = {V.~Paxson}, |
|
398 title = {Bro: A System for Detecting Network Intruders in Real-Time}, |
|
399 year = {1998}, |
335 publisher = {USENIX Association}, |
400 publisher = {USENIX Association}, |
336 address = {USA}, |
401 address = {USA}, |
337 abstract = {Network intrusion detection systems (NIDS) are an important part of any network security architecture. They provide a layer of defense which monitors network traffic for predefined suspicious activity or patterns, and alert system administrators when potential hostile traffic is detected. Commercial NIDS have many differences, but Information Systems departments must face the commonalities that they share such as significant system footprint, complex deployment and high monetary cost. Snort was designed to address these issues.}, |
402 abstract = {We describe Bro, a stand-alone system for detecting network intruders in real-time by passively monitoring a network link over which the intruder's traffic transits. We give an overview of the system's design, which emphasizes high-speed (FDDI-rate) monitoring, real-time notification, clear separation between mechanism and policy, and extensibility. To achieve these ends, Bro is divided into an "event engine" that reduces a kernel-filtered network traffic stream into a series of higher-level events, and a "policy script interpreter" that interprets event handlers written in a specialized language used to express a site's security policy. Event handlers can update state information, synthesize new events, record information to disk, and generate real-time notifications via syslog. We also discuss a number of attacks that attempt to subvert passive monitoring systems and defenses against these, and give particulars of how Bro analyzes the four applications integrated into it so far: Finger, FTP, Portmapper and Telnet. The system is publicly available in source code form.}, |
338 booktitle = {Proceedings of the 13th USENIX Conference on System Administration}, |
403 booktitle = {Proceedings of the 7th Conference on USENIX Security Symposium - Volume 7}, |
339 pages = {229–238}, |
404 pages = {3}, |
340 numpages = {10}, |
405 numpages = {1}, |
341 location = {Seattle, Washington}, |
406 location = {San Antonio, Texas}, |
342 series = {LISA '99} |
407 series = {SSYM'98} |
343 } |
408 } |
344 |
409 |
|
410 |
|
411 |
345 @inproceedings{Becchi08, |
412 @inproceedings{Becchi08, |
346 author = {Becchi, Michela and Crowley, Patrick}, |
413 author = {Becchi, Michela and Crowley, Patrick}, |
347 year = {2008}, |
414 doi = {10.1145/1544012.1544037}, |
348 month = {01}, |
415 month = {01}, |
349 pages = {25}, |
416 pages = {25}, |
350 title = {Extending finite automata to efficiently match Perl-compatible regular expressions}, |
417 title = {Extending finite automata to efficiently match Perl-compatible regular expressions}, |
351 doi = {10.1145/1544012.1544037} |
418 year = {2008}, |
|
419 bdsk-url-1 = {https://doi.org/10.1145/1544012.1544037}} |
|
420 |
|
421 @book{Sakarovitch2009, |
|
422 author = {Sakarovitch, Jacques}, |
|
423 doi = {10.1017/CBO9781139195218}, |
|
424 editor = {Thomas, ReubenTranslator}, |
|
425 place = {Cambridge}, |
|
426 publisher = {Cambridge University Press}, |
|
427 title = {Elements of Automata Theory}, |
|
428 year = {2009}, |
|
429 bdsk-url-1 = {https://doi.org/10.1017/CBO9781139195218}} |
|
430 |
|
431 @unpublished{CSL2022, |
|
432 author = {Chengsong Tan and Christian Urban}, |
|
433 note = {submitted}, |
|
434 title = {POSIX Lexing with Bitcoded Derivatives}} |
|
435 |
|
436 @inproceedings{Verbatim, |
|
437 author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, |
|
438 booktitle = {2021 IEEE Security and Privacy Workshops (SPW)}, |
|
439 doi = {10.1109/SPW53761.2021.00022}, |
|
440 pages = {92-100}, |
|
441 title = {Verbatim: A Verified Lexer Generator}, |
|
442 year = {2021}, |
|
443 bdsk-url-1 = {https://doi.org/10.1109/SPW53761.2021.00022}} |
|
444 |
|
445 |
|
446 @inproceedings{Nipkow1998, |
|
447 author = {Tobias Nipkow}, |
|
448 editor = {Jim Grundy and |
|
449 Malcolm C. Newey}, |
|
450 title = {Verified Lexical Analysis}, |
|
451 booktitle = {Theorem Proving in Higher Order Logics, 11th International Conference, |
|
452 TPHOLs'98, Canberra, Australia, September 27 - October 1, 1998, Proceedings}, |
|
453 series = {Lecture Notes in Computer Science}, |
|
454 volume = {1479}, |
|
455 pages = {1--15}, |
|
456 publisher = {Springer}, |
|
457 year = {1998}, |
|
458 url = {https://doi.org/10.1007/BFb0055126}, |
|
459 doi = {10.1007/BFb0055126}, |
|
460 timestamp = {Tue, 14 May 2019 10:00:48 +0200}, |
|
461 biburl = {https://dblp.org/rec/conf/tphol/Nipkow98.bib}, |
|
462 bibsource = {dblp computer science bibliography, https://dblp.org} |
352 } |
463 } |
353 |
464 |
354 @book{ |
|
355 Sakarovitch2009, |
|
356 place={Cambridge}, |
|
357 title={Elements of Automata Theory}, |
|
358 DOI={10.1017/CBO9781139195218}, |
|
359 publisher={Cambridge University Press}, |
|
360 author={Sakarovitch, Jacques}, |
|
361 editor={Thomas, ReubenTranslator}, |
|
362 year={2009} |
|
363 } |
|
364 |
|
365 |
|
366 @unpublished{CSL2022, |
|
367 author = "Chengsong Tan and Christian Urban", |
|
368 title = "POSIX Lexing with Bitcoded Derivatives", |
|
369 note = "submitted", |
|
370 } |
|
371 |
|
372 @INPROCEEDINGS{Verbatim, author={Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, booktitle={2021 IEEE Security and Privacy Workshops (SPW)}, title={Verbatim: A Verified Lexer Generator}, year={2021}, volume={}, number={}, pages={92-100}, doi={10.1109/SPW53761.2021.00022}} |
|
373 |
|
374 |
465 |
375 @inproceedings{Verbatimpp, |
466 @inproceedings{Verbatimpp, |
376 author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, |
467 abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.}, |
377 title = {Verbatim++: Verified, Optimized, and Semantically Rich Lexing with Derivatives}, |
468 address = {New York, NY, USA}, |
378 year = {2022}, |
469 author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen}, |
379 isbn = {9781450391825}, |
470 booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs}, |
380 publisher = {Association for Computing Machinery}, |
471 doi = {10.1145/3497775.3503694}, |
381 address = {New York, NY, USA}, |
472 isbn = {9781450391825}, |
382 url = {https://doi.org/10.1145/3497775.3503694}, |
473 keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions}, |
383 doi = {10.1145/3497775.3503694}, |
474 location = {Philadelphia, PA, USA}, |
384 abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.}, |
475 numpages = {13}, |
385 booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs}, |
476 pages = {27--39}, |
386 pages = {27–39}, |
477 publisher = {Association for Computing Machinery}, |
387 numpages = {13}, |
478 series = {CPP 2022}, |
388 keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions}, |
479 title = {Verbatim++: Verified, Optimized, and Semantically Rich Lexing with Derivatives}, |
389 location = {Philadelphia, PA, USA}, |
480 url = {https://doi.org/10.1145/3497775.3503694}, |
390 series = {CPP 2022} |
481 year = {2022}, |
391 } |
482 bdsk-url-1 = {https://doi.org/10.1145/3497775.3503694}} |
392 |
|
393 |
483 |
394 @article{Turo_ov__2020, |
484 @article{Turo_ov__2020, |
395 author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar}, |
485 author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar}, |
396 date-added = {2022-05-23 18:43:04 +0100}, |
486 date-added = {2022-05-23 18:43:04 +0100}, |
397 date-modified = {2022-05-23 18:43:04 +0100}, |
487 date-modified = {2022-05-23 18:43:04 +0100}, |