231 volume = {410}, |
231 volume = {410}, |
232 year = {2009}, |
232 year = {2009}, |
233 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789}, |
233 bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S0304397509001789}, |
234 bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}} |
234 bdsk-url-2 = {https://doi.org/10.1016/j.tcs.2009.02.022}} |
235 |
235 |
|
236 |
|
237 @incollection{Aho1990, |
|
238 title = {CHAPTER 5 - Algorithms for Finding Patterns in Strings}, |
|
239 editor = {JAN {VAN LEEUWEN}}, |
|
240 booktitle = {Algorithms and Complexity}, |
|
241 publisher = {Elsevier}, |
|
242 address = {Amsterdam}, |
|
243 pages = {255-300}, |
|
244 year = {1990}, |
|
245 series = {Handbook of Theoretical Computer Science}, |
|
246 isbn = {978-0-444-88071-0}, |
|
247 doi = {https://doi.org/10.1016/B978-0-444-88071-0.50010-2}, |
|
248 url = {https://www.sciencedirect.com/science/article/pii/B9780444880710500102}, |
|
249 author = {A.V.~Aho}, |
|
250 abstract = {Publisher Summary |
|
251 This chapter discusses the algorithms for solving string-matching problems that have proven useful for text-editing and text-processing applications. String pattern matching is an important problem that occurs in many areas of science and information processing. In computing, it occurs naturally as part of data processing, text editing, term rewriting, lexical analysis, and information retrieval. Many text editors and programming languages have facilities for matching strings. In biology, string-matching problems arise in the analysis of nucleic acids and protein sequences, and in the investigation of molecular phylogeny. String matching is also one of the central and most widely studied problems in theoretical computer science. The simplest form of the problem is to locate an occurrence of a keyword as a substring in a sequence of characters, which is called the input string. For example, the input string queueing contains the keyword ueuei as a substring. Even for this problem, several innovative, theoretically interesting algorithms have been devised that run significantly faster than the obvious brute-force method.} |
|
252 } |
|
253 |
236 %% back-references-------------------- |
254 %% back-references-------------------- |
237 %% ------------------------------------- |
255 %% ------------------------------------- |
|
256 |
|
257 |
|
258 |
|
259 %%---------------------------------------------------------------- |
|
260 %%----------------------------------------zippers |
|
261 @article{10.1145/3408990, |
|
262 author = {Darragh, Pierce and Adams, Michael D.}, |
|
263 title = {Parsing with Zippers (Functional Pearl)}, |
|
264 year = {2020}, |
|
265 issue_date = {August 2020}, |
|
266 publisher = {Association for Computing Machinery}, |
|
267 address = {New York, NY, USA}, |
|
268 volume = {4}, |
|
269 number = {ICFP}, |
|
270 url = {https://doi.org/10.1145/3408990}, |
|
271 doi = {10.1145/3408990}, |
|
272 abstract = {Parsing with Derivatives (PwD) is an elegant approach to parsing context-free grammars (CFGs). It takes the equational theory behind Brzozowski's derivative for regular expressions and augments that theory with laziness, memoization, and fixed points. The result is a simple parser for arbitrary CFGs. Although recent work improved the performance of PwD, it remains inefficient due to the algorithm repeatedly traversing some parts of the grammar. In this functional pearl, we show how to avoid this inefficiency by suspending the state of the traversal in a zipper. When subsequent derivatives are taken, we can resume the traversal from where we left off without retraversing already traversed parts of the grammar. However, the original zipper is designed for use with trees, and we want to parse CFGs. CFGs can include shared regions, cycles, and choices between alternates, which makes them incompatible with the traditional tree model for zippers. This paper develops a generalization of zippers to properly handle these additional features. Just as PwD generalized Brzozowski's derivatives from regular expressions to CFGs, we generalize Huet's zippers from trees to CFGs. Abstract The resulting parsing algorithm is concise and efficient: it takes only 31 lines of OCaml code to implement the derivative function but performs 6,500 times faster than the original PwD and 3.24 times faster than the optimized implementation of PwD.}, |
|
273 journal = {Proc. ACM Program. Lang.}, |
|
274 month = {aug}, |
|
275 articleno = {108}, |
|
276 numpages = {28}, |
|
277 keywords = {Derivatives, Zippers, Parsing with Derivatives, Parsing} |
|
278 } |
|
279 |
|
280 |
|
281 |
|
282 @article{Edelmann:287059, |
|
283 title = {Efficient Parsing with Derivatives and Zippers}, |
|
284 author = {Edelmann, Romain}, |
|
285 institution = {IINFCOM}, |
|
286 publisher = {EPFL}, |
|
287 address = {Lausanne}, |
|
288 pages = {246}, |
|
289 year = {2021}, |
|
290 abstract = {Parsing is the process that enables a computer system to make sense of raw data. Parsing is common to almost all computer systems: It is involved every time sequential data is read and elaborated into structured data. The theory of parsing usually focuses on the binary recognition aspect of parsing and eschews this essential data-elaboration aspect. In this thesis, I present a declarative framework for value-aware parsing that explicitly integrates data elaboration. |
|
291 Within the framework of the thesis, I present parsing algorithms that are based on the concept of Brzozowski's derivatives. Derivative-based parsing algorithms present several advantages: they are elegant, amenable to formal reasoning, and easy to implement. Unfortunately, the performance of these algorithms in practice is often not competitive with other approaches. In this thesis, I show a general technique inspired by Huet's Zipper to greatly enhance the performance of derivative-based algorithms, and I do so without compromising their elegance, amenability to formal reasoning, or ease of implementation. |
|
292 First, I present a technique for building efficient tokenisers that is based on Brzozowski's derivatives and Huet's zipper and that does not require the usual burdensome explicit conversion to automata. I prove the technique is correct in Coq and present SILEX, a Scala lexing library based on the technique. I demonstrate that the approach is competitive with state-of-the-art solutions. |
|
293 Then, I present a characterisation of LL(1) languages based on the concept of should-not-follow sets. I present an algorithm for parsing LL(1) languages with derivatives and zippers. I show a formal proof of the algorithm's correctness and prove its worst-case linear-time complexity. I show how the LL(1) parsing with derivatives and zippers algorithm corresponds to the traditional LL(1) parsing algorithm. |
|
294 I then present SCALL1ON, a Scala parsing combinators library for LL(1) languages that incorporates the LL(1) parsing with derivatives and zippers algorithm. I present an expressive and familiar combinator-based interface for describing LL(1) languages. I present techniques that help precisely locate LL(1) conflicts in user code. I discuss several advantages of the parsing with derivatives approach within the context of a parsing library. I also present SCALL1ON's enumeration and pretty-printing features and discuss their implementation. Through a series of benchmarks, I demonstrate the good performance and practicality of the approach. Finally, I present how to adapt the LL(1) parsing with derivatives and zippers algorithm to support arbitrary context-free languages. I show how the adapted algorithm corresponds to general parsing algorithms, such as Earley's parsing algorithm.}, |
|
295 url = {http://infoscience.epfl.ch/record/287059}, |
|
296 doi = {10.5075/epfl-thesis-7357}, |
|
297 } |
|
298 |
|
299 @inproceedings{Zippy2020, |
|
300 author = {Edelmann, Romain and Hamza, Jad and Kun\v{c}ak, Viktor}, |
|
301 title = {Zippy LL(1) Parsing with Derivatives}, |
|
302 year = {2020}, |
|
303 isbn = {9781450376136}, |
|
304 publisher = {Association for Computing Machinery}, |
|
305 address = {New York, NY, USA}, |
|
306 url = {https://doi.org/10.1145/3385412.3385992}, |
|
307 doi = {10.1145/3385412.3385992}, |
|
308 abstract = {In this paper, we present an efficient, functional, and formally verified parsing algorithm for LL(1) context-free expressions based on the concept of derivatives of formal languages. Parsing with derivatives is an elegant parsing technique, which, in the general case, suffers from cubic worst-case time complexity and slow performance in practice. We specialise the parsing with derivatives algorithm to LL(1) context-free expressions, where alternatives can be chosen given a single token of lookahead. We formalise the notion of LL(1) expressions and show how to efficiently check the LL(1) property. Next, we present a novel linear-time parsing with derivatives algorithm for LL(1) expressions operating on a zipper-inspired data structure. We prove the algorithm correct in Coq and present an implementation as a part of Scallion, a parser combinators framework in Scala with enumeration and pretty printing capabilities.}, |
|
309 booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
|
310 pages = {1036–1051}, |
|
311 numpages = {16}, |
|
312 keywords = {Parsing, Zipper, Formal proof, LL(1), Derivatives}, |
|
313 location = {London, UK}, |
|
314 series = {PLDI 2020} |
|
315 } |
|
316 |
|
317 |
|
318 |
|
319 %%----------------------------------------zippers |
|
320 %%---------------------------------------------------------------- |
|
321 |
238 |
322 |
239 @article{fowler2003, |
323 @article{fowler2003, |
240 title={An interpretation of the POSIX regex standard}, |
324 title={An interpretation of the POSIX regex standard}, |
241 author={Fowler, Glenn}, |
325 author={Fowler, Glenn}, |
242 journal={URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html}, |
326 journal={URL: https://web. archive. org/web/20050408073627/http://www. research. att. com/\~{} gsf/testregex/re-interpretation. html}, |