lexing: changeset 601:ce4e5151a836

--- a/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex	Mon Sep 12 23:32:18 2022 +0200
+++ b/ChengsongTanPhdThesis/Chapters/Bitcoded2.tex	Thu Sep 22 00:31:09 2022 +0100
@@ -178,7 +178,7 @@
 \begin{axis}[
     xlabel={$n$},
     ylabel={time},
-    ymode = log,
+    %ymode = log,
     legend entries={time in secs},  
     legend pos=north west,
     legend cell align=left]

--- a/ChengsongTanPhdThesis/Chapters/Finite.tex	Mon Sep 12 23:32:18 2022 +0200
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex	Thu Sep 22 00:31:09 2022 +0100
@@ -396,43 +396,47 @@
 
 \begin{center}
 	\begin{tabular}{@{}lcl@{}}
-$r \backslash_{rsimps} (c\!::\!s) $ & $\dn$ & $(r \backslash_{rsimp}\, c) \backslash_{rsimps}\, s$ \\
+$r \backslash_{rsimps} \; \; c\!::\!s $ & $\dn$ & $(r \backslash_{rsimp}\, c) \backslash_{rsimps}\, s$ \\
 $r \backslash_{rsimps} [\,] $ & $\dn$ & $r$
 	\end{tabular}
 \end{center}
 \noindent
-With $\rrexp$ the size caclulation of annotated regular expressions'
-simplification and derivatives can be done by the size of their unlifted 
-counterpart with the unlifted version of simplification and derivatives applied.
+We do not define an r-regular expression version of $\blexersimp$,
+as our proof does not involve its use. 
+Everything about the size of annotated regular expressions
+can be calculated via the size of r-regular expressions:
 \begin{lemma}\label{sizeRelations}
 	The following equalities hold:
 	\begin{itemize}
 		\item
-			$\asize{\bsimp{a}} = \rsize{\rsimp{\rerase{a}}}$
+			$\asize{\bsimps \; a} = \rsize{\rsimp{ \rerase{a}}}$
 		\item
 			$\asize{\bderssimp{a}{s}} =  \rsize{\rderssimp{\rerase{a}}{s}}$
 	\end{itemize}
 \end{lemma}
+\begin{proof}
+	The first part is by induction on the inductive cases
+	of $\textit{bsimp}$.
+	The second part is by induction on the string $s$,
+	where the inductive step follows from part one.
+\end{proof}
 \noindent
 With lemma \ref{sizeRelations},
-a bound on $\rsize{\rderssimp{\rerase{a}}{s}}$
-\[
-	\llbracket \rderssimp{a}{s} \rrbracket_r \leq N_r
-\]
-\noindent
-would apply to $\asize{\bderssimp{a}{s}}$ as well.
-\[
-	\llbracket a \backslash_{bsimps} s \rrbracket \leq N_r.
-\]
-In the following content, we will focus on $\rrexp$'s size bound.
-Whatever bounds we proved for $ \rsize{\rderssimp{\rerase{r}}{s}}$  
-will automatically apply to $\asize{\bderssimp{r}{s}}$.\\
-Unless stated otherwise in this chapter all regular expressions without
+we will be able to focus on 
+estimating only
+$\rsize{\rderssimp{\rerase{a}}{s}}$
+in later parts because
+\begin{center}
+	$\rsize{\rderssimp{\rerase{a}}{s}} \leq N_r \quad$
+	implies
+	$\quad \llbracket a \backslash_{bsimps} s \rrbracket \leq N_r$.
+\end{center}
+Unless stated otherwise in the rest of this 
+chapter all regular expressions without
 bitcodes are seen as $\rrexp$s.
-We also use $r_1 + r_2$ and $\RALTS{[r_1, r_2]}$ interchageably
-as the former suits people's intuitive way of stating a binary alternative
-regular expression.
-
+For the binary alternative r-regular expression $\RALTS{[r_1, r_2]}$,
+we use the notation $r_1 + r_2$
+for brevity.
 
 
 %-----------------------------------
@@ -473,6 +477,31 @@
 %\noindent
 %We explain in detail how we reached those claims.
 \subsection{Basic Properties needed for Closed Forms}
+If we attempt to prove 
+\begin{center}
+	$\forall r. \; \exists N_r.\;\; s.t. \llbracket r\backslash_{bsimps} s \rrbracket_r \leq N_r$
+\end{center}
+using a naive induction on the structure of $r$,
+then we are stuck at the inductive cases such as
+$r_1\cdot r_2$.
+The inductive hypotheses are:
+\begin{center}
+	1: $\text{for } r_1, \text{there exists } N_{r_1}.\;\; s.t. 
+	\;\;\forall s.  \llbracket r_1 \backslash_{bsimps} s \rrbracket_r \leq N_{r_1}. $\\
+	2: $\text{for } r_2, \text{there exists } N_{r_2}.\;\; s.t. 
+	\;\; \forall s. \llbracket r_2 \backslash_{bsimps} s \rrbracket_r \leq N_{r_2}. $
+\end{center}
+The inductive step to prove would be 
+\begin{center}
+	$\text{there exists } N_{r_1\cdot r_2}. \;\; s.t. \forall s. 
+	\llbracket (r_1 \cdot r_2) \backslash_{bsimps} s \rrbracket_r \leq N_{r_1\cdot r_2}.$
+\end{center}
+The problem is that it is not clear what 
+$(r_1\cdot r_2) \backslash_{bsimps} s$ looks like,
+and therefore $N_{r_1}$ and $N_{r_2}$ in the
+inductive hypotheses cannot be directly used.
+
+
 The next steps involves getting a closed form for
 $\rderssimp{r}{s}$ and then obtaining
 an estimate for the closed form.

--- a/ChengsongTanPhdThesis/Chapters/Inj.tex	Mon Sep 12 23:32:18 2022 +0200
+++ b/ChengsongTanPhdThesis/Chapters/Inj.tex	Thu Sep 22 00:31:09 2022 +0100
@@ -516,7 +516,7 @@
 Assuming the string is given as a sequence of characters, say $c_0c_1..c_n$, 
 this algorithm presented graphically is as follows:
 
-\begin{equation}\label{graph:successive_ders}
+\begin{equation}\label{matcher}
 \begin{tikzcd}
 r_0 \arrow[r, "\backslash c_0"]  & r_1 \arrow[r, "\backslash c_1"] & 
 r_2 \arrow[r, dashed]  & r_n  \arrow[r,"\textit{nullable}?"] & 
@@ -535,13 +535,13 @@
 	lemma \ref{derDer}.
 \end{proof}
 \noindent
+\begin{figure}
 \begin{center}
-\begin{figure}
 \begin{tikzpicture}
 \begin{axis}[
     xlabel={$n$},
     ylabel={time in secs},
-    ymode = log,
+    %ymode = log,
     legend entries={Naive Matcher},  
     legend pos=north west,
     legend cell align=left]
@@ -551,8 +551,8 @@
 \caption{Matching the regular expression $(a^*)^*b$ against strings of the form
 $\protect\underbrace{aa\ldots a}_\text{n \textit{a}s}
 $ using Brzozowski's original algorithm}\label{NaiveMatcher}
+\end{center}
 \end{figure}
-\end{center} 
 \noindent
 If we implement the above algorithm naively, however,
 the algorithm can be excruciatingly slow, as shown in 
@@ -600,8 +600,8 @@
 \begin{axis}[
     xlabel={$n$},
     ylabel={time in secs},
-    ymode = log,
-    xmode = log,
+    %ymode = log,
+    %xmode = log,
     grid = both,
     legend entries={Matcher With Simp},  
     legend pos=north west,
@@ -702,6 +702,8 @@
 is to make sure that for a given pair of regular 
 expression $r$ and string $s$, the number of values 
 satisfying $|v| = s$ and $\vdash v:r$ is finite.
+This additional condition was
+imposed by Ausaf and Urban to make their proofs easier.
 Given the same string and regular expression, there can be
 multiple values for it. For example, both
 $\vdash \Seq(\Left \; ab)(\Right \; c):(ab+a)(bc+c)$ and
@@ -717,9 +719,8 @@
 The $\POSIX$ value $v$ for a regular expression
 $r$ and string $s$, denoted as $(s, r) \rightarrow v$, can be specified 
 in the following set of rules\footnote{The names of the rules are used
-as they were originally given in \cite{AusafDyckhoffUrban2016}}:
-\noindent
-\begin{figure}
+as they were originally given in \cite{AusafDyckhoffUrban2016} }:
+\begin{figure}[H]
 \begin{mathpar}
 	\inferrule[P1]{\mbox{}}{([], \ONE) \rightarrow \Empty}
 		
@@ -741,7 +742,13 @@
 		s_1@s_3 \in L \; r \land s_4 \in L \; r^*}{(s_1@s_2, r^*)\rightarrow \Stars \;
 	(v::vs)}
 \end{mathpar}
-\caption{POSIX Lexing Rules}
+\caption{The inductive POSIX Lexing Rules defined by Ausaf, Dyckhoff and Urban \cite{AusafDyckhoffUrban2016}.
+The ternary relation, written $(s, r) \rightarrow v$, formalises the POSIX constraints on the
+value $v$ given a string $s$ and 
+regular expression $r$.
+For example, this specification says that all matches for an alternative 
+must always prefer a left value to a right one.
+}
 \end{figure}
 \noindent
 
@@ -832,7 +839,7 @@
 When we have 
 \[
 	(s_1, r_1) \rightarrow v_1 \;\, and \;\, 
-	(s_2, r_2) \rightarrow v_2  \;\, and \;\,\\ 
+	(s_2, r_2) \rightarrow v_2  \;\, and \;\, 
 	\nexists s_3 \; s_4. s_3 \neq [] \land s_3 @ s_4 = s_2 \land 
 		s_1@ s_3 \in L \; r_1 \land s_4 \in L \; r_2
 \]
@@ -942,14 +949,25 @@
 of $s$ (i.e. $s_i = c_i \ldots c_{n-1}$ ) from the previous lexical value $v_{i+1}$.
 After injecting back $n$ characters, we get the lexical value for how $r_0$
 matches $s$. 
+\begin{figure}[H]
+\begin{center}	
 \begin{ceqn}
-\begin{equation}\label{graph:inj}
 \begin{tikzcd}
 r_0 \arrow[r, dashed] \arrow[d]& r_i \arrow[r, "\backslash c_i"]  \arrow[d]  & r_{i+1}  \arrow[r, dashed] \arrow[d]        & r_n \arrow[d, "mkeps" description] \\
 v_0           \arrow[u]                 & v_i  \arrow[l, dashed]                              & v_{i+1} \arrow[l,"inj_{r_i} c_i"]                 & v_n \arrow[l, dashed]         
 \end{tikzcd}
-\end{equation}
 \end{ceqn}
+\end{center}
+\caption{The two-phase lexing algorithm by Sulzmann and Lu \cite{AusafDyckhoffUrban2016},
+	matching the regular expression $r_0$ and string of the form $[c_0, c_1, \ldots, c_{n-1}]$.
+	The first phase involves taking successive derivatives w.r.t the characters $c_0$,
+	$c_1$, and so on. These are the same operations as they have appeared in the matcher
+	\ref{matcher}. When the final derivative regular expression is nullable (contains the empty string),
+	then the second phase starts. First $\mkeps$ generates a POSIX value which tells us how $r_n$ matches
+	the empty string , by always selecting the leftmost 
+	nullable regular expression. After that $\inj$ ``injects'' back the character in reverse order as they 
+	appeared in the string, always preserving POSIXness.}\label{graph:inj}
+\end{figure}
 \noindent
 $\textit{inj}$ takes three arguments: a regular
 expression ${r_{i}}$, before the character is chopped off, 
@@ -1188,7 +1206,7 @@
 even with simplification, which is not much better compared
 with the naive version without any simplifications:
 \begin{figure}[H]
-	\centering
+\begin{center}
 \begin{tikzpicture}
 \begin{axis}[
     xlabel={$n$},
@@ -1200,6 +1218,7 @@
 \addplot[blue,mark=*, mark options={fill=white}] table {BetterWaterloo1.data};
 \end{axis}
 \end{tikzpicture} 
+\end{center}
 \caption{Size of $(a^*\cdot a^*)^*$ against $\protect\underbrace{aa\ldots a}_\text{n \textit{a}s}$}
 \end{figure}\label{fig:BetterWaterloo}

--- a/ChengsongTanPhdThesis/Chapters/Introduction.tex	Mon Sep 12 23:32:18 2022 +0200
+++ b/ChengsongTanPhdThesis/Chapters/Introduction.tex	Thu Sep 22 00:31:09 2022 +0100
@@ -151,7 +151,7 @@
 \def\zeroable{\textit{zeroable}}
 \def\nub{\textit{nub}}
 \def\filter{\textit{filter}}
-\def\not{\textit{not}}
+%\def\not{\textit{not}}
 
 
 
@@ -192,8 +192,32 @@
 
 %TODO: look up snort rules to use here--give readers idea of what regexes look like
 
-\begin{figure}
-\centering
+
+
+
+
+
+Regular expressions are widely used in computer science: 
+be it in text-editors \parencite{atomEditor} with syntax highlighting and auto-completion;
+command-line tools like $\mathit{grep}$ that facilitate easy 
+text-processing; network intrusion
+detection systems that reject suspicious traffic; or compiler
+front ends--the majority of the solutions to these tasks 
+involve lexing with regular 
+expressions.
+Given its usefulness and ubiquity, one would imagine that
+modern regular expression matching implementations
+are mature and fully studied.
+Indeed, in a popular programming language' regex engine, 
+supplying it with regular expressions and strings, one can
+get rich matching information in a very short time.
+Some network intrusion detection systems
+use regex engines that are able to process 
+megabytes or even gigabytes of data per second \parencite{Turo_ov__2020}.
+Unfortunately, this is not the case for $\mathbf{all}$ inputs.
+%TODO: get source for SNORT/BRO's regex matching engine/speed
+
+\begin{figure}[p]
 \begin{tabular}{@{}c@{\hspace{0mm}}c@{\hspace{0mm}}c@{}}
 \begin{tikzpicture}
 \begin{axis}[
@@ -257,36 +281,56 @@
 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
 \end{axis}
 \end{tikzpicture}\\
-\multicolumn{3}{c}{Graphs: Runtime for matching $(a^*)^*\,b$ with strings 
-           of the form $\underbrace{aa..a}_{n}$.}
+\begin{tikzpicture}
+\begin{axis}[
+    xlabel={$n$},
+    x label style={at={(1.05,-0.05)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,5,...,30},
+    xmax=33,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5cm,
+    height=4cm, 
+    legend entries={Dart},  
+    legend pos=north west,
+    legend cell align=left]
+\addplot[green,mark=*, mark options={fill=white}] table {re-dart.data};
+\end{axis}
+\end{tikzpicture}
+  &
+\begin{tikzpicture}
+\begin{axis}[
+    xlabel={$n$},
+    x label style={at={(1.05,-0.05)}},
+    %ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,5,...,30},
+    xmax=33,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5cm,
+    height=4cm, 
+    legend entries={Swift},  
+    legend pos=north west,
+    legend cell align=left]
+\addplot[purple,mark=*, mark options={fill=white}] table {re-swift.data};
+\end{axis}
+\end{tikzpicture}
+  & \\
+\multicolumn{3}{c}{Graphs}
 \end{tabular}    
-\caption{aStarStarb} \label{fig:aStarStarb}
-\end{figure}
-
-
-
-
-
-Regular expressions are widely used in computer science: 
-be it in text-editors \parencite{atomEditor} with syntax highlighting and auto-completion;
-command-line tools like $\mathit{grep}$ that facilitate easy 
-text-processing; network intrusion
-detection systems that reject suspicious traffic; or compiler
-front ends--the majority of the solutions to these tasks 
-involve lexing with regular 
-expressions.
-Given its usefulness and ubiquity, one would imagine that
-modern regular expression matching implementations
-are mature and fully studied.
-Indeed, in a popular programming language' regex engine, 
-supplying it with regular expressions and strings, one can
-get rich matching information in a very short time.
-Some network intrusion detection systems
-use regex engines that are able to process 
-megabytes or even gigabytes of data per second \parencite{Turo_ov__2020}.
-Unfortunately, this is not the case for $\mathbf{all}$ inputs.
-%TODO: get source for SNORT/BRO's regex matching engine/speed
-
+\caption{Graphs showing runtime for matching $(a^*)^*\,b$ with strings 
+           of the form $\protect\underbrace{aa..a}_{n}$ in various existing regular expression libraries.
+   The reason for their superlinear behaviour is that they do a depth-first-search.
+   If the string does not match, the engine starts to explore all possibilities. 
+}\label{fig:aStarStarb}
+\end{figure}\afterpage{\clearpage}
 
 Take $(a^*)^*\,b$ and ask whether
 strings of the form $aa..a$ match this regular
@@ -299,14 +343,12 @@
 This is clearly exponential behaviour, and 
 is triggered by some relatively simple regex patterns, as the graphs
  in \ref{fig:aStarStarb} show.
-
+Java 9 and newer
+versions improves this behaviour, but is still slow compared 
+with the approach we are going to use.
 
 
 
-\ChristianComment{Superlinear I just leave out the explanation 
-which I find once used would distract the flow. Plus if i just say exponential
-here the 2016 event in StackExchange was not exponential, but just quardratic so would be 
-in accurate}
 
 This superlinear blowup in regular expression engines
 had repeatedly caused grief in real life.
@@ -330,7 +372,6 @@
 had several causes, at the heart was a regular expression that
 was used to monitor network
 traffic.\footnote{\url{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/}(Last accessed in 2022)}
-%TODO: data points for some new versions of languages
 These problems with regular expressions 
 are not isolated events that happen
 very occasionally, but actually widespread.
@@ -345,22 +386,29 @@
 requires
 more research attention.
 
+Regular expressions and regular expression matchers 
+have of course been studied for many, many years.
+One of the most recent work in the context of lexing
+is the Verbatim lexer by Egolf, Lasser and Fisher\cite{Verbatim}.
+This is relevant work and we will compare later on
+our derivative-based matcher we are going to present.
+There is also some newer work called
+Verbatim++\cite{Verbatimpp}, this does not use derivatives, but automaton instead.
+For that the problem is the bounded repetitions ($r^{n}$,
+$r^{\ldots m}$, $r^{n\ldots}$ and $r^{n\ldots m}$).
+They often occur in practical use, in for example the Snort XML definitions:
 
-But the problems are not limited to slowness on certain 
+
+The problems are not limited to slowness on certain 
 cases. 
 Another thing about these libraries is that there
 is no correctness guarantee.
 In some cases, they either fail to generate a lexing result when there exists a match,
 or give results that are inconsistent with the $\POSIX$ standard.
-A concrete example would be 
-the regex
-\begin{verbatim}
-(aba|ab|a)*
-\end{verbatim}
-and the string
-\begin{verbatim}
-ababa
-\end{verbatim}
+A concrete example would be the regex
+\begin{center}
+	$(aba + ab + a)* \text{and the string} ababa$
+\end{center}
 The correct $\POSIX$ match for the above would be 
 with the entire string $ababa$, 
 split into two Kleene star iterations, $[ab] [aba]$ at positions
@@ -375,12 +423,13 @@
 correctly implementing the POSIX (maximum-munch)
 rule of regular expression matching.
 
-As Grathwohl\parencite{grathwohl2014crash} commented,
-\begin{center}
-	``The POSIX strategy is more complicated than the greedy because of the dependence on information about the length of matched strings in the various subexpressions.''
-\end{center}
-
-
+As Grathwohl\parencite{grathwohl2014crash} wrote,
+\begin{quote}
+	The POSIX strategy is more complicated than the 
+	greedy because of the dependence on information about 
+	the length of matched strings in the various subexpressions.
+\end{quote}
+%\noindent
 To summarise the above, regular expressions are important.
 They are popular and programming languages' library functions
 for them are very fast on non-catastrophic cases.
@@ -424,7 +473,6 @@
 representing that regex $r$ is being operated on. 
 We represent the time
 it takes by $P_2(r, s)$.\\
-
 For $\mathit{DFA}$,
 we have $P_2(r, s) = O( |s| )$,
 because we take at most $|s|$ steps, 
@@ -435,13 +483,14 @@
 transition upon receiving an input symbol.
 But unfortunately in the  worst case
 $P_1(r) = O(exp^{|r|})$. An example will be given later. 
-
-
 For $\mathit{NFA}$s, we have $P_1(r) = O(|r|)$ if we do not unfold 
-expressions like $r^n$ into $\underbrace{r \cdots r}_{\text{n copies of r}}$.
+expressions like $r^n$ into 
+\[
+	\underbrace{r \cdots r}_{\text{n copies of r}}.
+\]
 The $P_2(r, s)$ is bounded by $|r|\cdot|s|$, if we do not backtrack.
 On the other hand, if backtracking is used, the worst-case time bound bloats
-to $|r| * 2^|s|$.
+to $|r| * 2^{|s|}$.
 %on the input
 %And when calculating the time complexity of the matching algorithm,
 %we are assuming that each input reading step requires constant time.
@@ -638,7 +687,6 @@
 %when the language is still regular).
  %TODO: test performance of Rust on (((((a*a*)b*)b){20})*)c  baabaabababaabaaaaaaaaababaaaababababaaaabaaabaaaaaabaabaabababaababaaaaaaaaababaaaababababaaaaaaaaaaaaac
  %TODO: verify the fact Rust does not allow 1000+ reps
-\ChristianComment{Comment required: Java 17 updated graphs? Is it ok to still use Java 8 graphs?}
 
 
 So we have practical implementations

--- a/ChengsongTanPhdThesis/example.bib	Mon Sep 12 23:32:18 2022 +0200
+++ b/ChengsongTanPhdThesis/example.bib	Thu Sep 22 00:31:09 2022 +0100
@@ -5,8 +5,27 @@
 
 
 %% Saved with string encoding Unicode (UTF-8) 
+@INPROCEEDINGS{Verbatim,  author={Egolf, Derek and Lasser, Sam and Fisher, Kathleen},  booktitle={2021 IEEE Security and Privacy Workshops (SPW)},   title={Verbatim: A Verified Lexer Generator},   year={2021},  volume={},  number={},  pages={92-100},  doi={10.1109/SPW53761.2021.00022}}
 
 
+@inproceedings{Verbatimpp,
+author = {Egolf, Derek and Lasser, Sam and Fisher, Kathleen},
+title = {Verbatim++: Verified, Optimized, and Semantically Rich Lexing with Derivatives},
+year = {2022},
+isbn = {9781450391825},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+url = {https://doi.org/10.1145/3497775.3503694},
+doi = {10.1145/3497775.3503694},
+abstract = {Lexers and parsers are attractive targets for attackers because they often sit at the boundary between a software system's internals and the outside world. Formally verified lexers can reduce the attack surface of these systems, thus making them more secure. One recent step in this direction is the development of Verbatim, a verified lexer based on the concept of Brzozowski derivatives. Two limitations restrict the tool's usefulness. First, its running time is quadratic in the length of its input string. Second, the lexer produces tokens with a simple "tag and string" representation, which limits the tool's ability to integrate with parsers that operate on more expressive token representations. In this work, we present a suite of extensions to Verbatim that overcomes these limitations while preserving the tool's original correctness guarantees. The lexer achieves effectively linear performance on a JSON benchmark through a combination of optimizations that, to our knowledge, has not been previously verified. The enhanced version of Verbatim also enables users to augment their lexical specifications with custom semantic actions, and it uses these actions to produce semantically rich tokens---i.e., tokens that carry values with arbitrary, user-defined types. All extensions were implemented and verified with the Coq Proof Assistant.},
+booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs},
+pages = {27–39},
+numpages = {13},
+keywords = {Brzozowski derivatives, formal verification, lexical analysis, semantic actions},
+location = {Philadelphia, PA, USA},
+series = {CPP 2022}
+}
+
 
 @article{Turo_ov__2020,
 	author = {Lenka Turo{\v{n}}ov{\'{a}} and Luk{\'{a}}{\v{s}} Hol{\'{\i}}k and Ond{\v{r}}ej Leng{\'{a}}l and Olli Saarikivi and Margus Veanes and Tom{\'{a}}{\v{s}} Vojnar},

--- a/ChengsongTanPhdThesis/main.tex	Mon Sep 12 23:32:18 2022 +0200
+++ b/ChengsongTanPhdThesis/main.tex	Thu Sep 22 00:31:09 2022 +0100
@@ -62,6 +62,7 @@
 %style=authoryear, natbib=true 
 \usepackage{stmaryrd}
 \usepackage{caption}
+\usepackage{afterpage}
 
 \addbibresource{example.bib} % The filename of the bibliography
 
@@ -270,11 +271,11 @@
 %	QUOTATION PAGE
 %----------------------------------------------------------------------------------------
 
-\vspace*{0.2\textheight}
-
-\noindent\enquote{\itshape Thanks to my solid academic training, today I can write hundreds of words on virtually any topic without possessing a shred of information, which is how I got a good job in journalism.}\bigbreak
-
-\hfill Dave Barry
+%\vspace*{0.2\textheight}
+%
+%\noindent\enquote{\itshape Thanks to my solid academic training, today I can write hundreds of words on virtually any topic without possessing a shred of information, which is how I got a good job in journalism.}\bigbreak
+%
+%\hfill Dave Barry
 
 %----------------------------------------------------------------------------------------
 %	ABSTRACT PAGE
@@ -333,7 +334,6 @@
 %	ABBREVIATIONS
 %----------------------------------------------------------------------------------------
 
-\begin{abbreviations}{ll} % Include a list of abbreviations (a table of two columns)
 
 
 \newtheorem{theorem}{Theorem}
@@ -346,66 +346,65 @@
 %proof
 
 
-\newcommand\sflat[1][]{\textit{sflat} \, #1}
-\newcommand{\ASEQ}[3]{\textit{ASEQ}_{#1} \, #2 \, #3}
-\newcommand{\bderssimp}[2]{#1 \backslash_{bsimp} #2}
-\newcommand{\rderssimp}[2]{#1 \backslash_{rsimp} #2}
-\newcommand{\sflataux}[1]{\lbracket #1 \rbracket}
-\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
-\newcommand{\ZERO}{\mbox{\bf 0}}
-\newcommand{\ONE}{\mbox{\bf 1}}
-\newcommand{\AALTS}[2]{\XOR {\scriptstyle #1}\, #2}
-
-\def\lexer{\mathit{lexer}}
-\def\mkeps{\mathit{mkeps}}
-
-\def\AZERO{\textit{AZERO}}
-\def\AONE{\textit{AONE}}
-\def\ACHAR{\textit{ACHAR}}
+%\newcommand\sflat[1][]{\textit{sflat} \, #1}
+%\newcommand{\ASEQ}[3]{\textit{ASEQ}_{#1} \, #2 \, #3}
+%\newcommand{\bderssimp}[2]{#1 \backslash_{bsimp} #2}
+%\newcommand{\rderssimp}[2]{#1 \backslash_{rsimp} #2}
+%\newcommand{\sflataux}[1]{\lbracket #1 \rbracket}
+%\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
+%\newcommand{\ZERO}{\mbox{\bf 0}}
+%\newcommand{\ONE}{\mbox{\bf 1}}
+%\newcommand{\AALTS}[2]{\XOR {\scriptstyle #1}\, #2}
+%
+%\def\lexer{\mathit{lexer}}
+%\def\mkeps{\mathit{mkeps}}
+%
+%\def\AZERO{\textit{AZERO}}
+%\def\AONE{\textit{AONE}}
+%\def\ACHAR{\textit{ACHAR}}
+%
+%
+%\def\ALTS{\textit{ALTS}}
+%\def\ASTAR{\textit{ASTAR}}
+%\def\DFA{\textit{DFA}}
+%\def\bmkeps{\textit{bmkeps}}
+%\def\retrieve{\textit{retrieve}}
+%\def\blexer{\textit{blexer}}
+%\def\flex{\textit{flex}}
+%\def\inj{\mathit{inj}}
+%\def\Empty{\mathit{Empty}}
+%\def\Left{\mathit{Left}}
+%\def\Right{\mathit{Right}}
+%\def\Stars{\mathit{Stars}}
+%\def\Char{\mathit{Char}}
+%\def\Seq{\mathit{Seq}}
+%\def\Der{\mathit{Der}}
+%\def\nullable{\mathit{nullable}}
+%\def\Z{\mathit{Z}}
+%\def\S{\mathit{S}}
+%\def\rup{r^\uparrow}
+%%\def\bderssimp{\mathit{bders}\_\mathit{simp}}
+%\def\distinctWith{\textit{distinctWith}}
+%
+%\def\rexp{\mathbf{rexp}}
+%\def\simp{\mathit{simp}}
+%\def\simpALTs{\mathit{simp}\_\mathit{ALTs}}
+%\def\map{\mathit{map}}
+%\def\distinct{\mathit{distinct}}
+%\def\blexersimp{\mathit{blexer}\_\mathit{simp}}
+%\def\map{\textit{map}}
+%\def\vsuf{\textit{vsuf}}
+%\def\sflataux{\textit{sflat}\_\textit{aux}}
+%\def\rrexp{\textit{rrexp}}
+%\def\rsize{\textit{rsize}}
+%\def\asize{\textit{asize}}
+%\def\rerase{\textit{rerase}}
+%\def\erase{\textit{erase}}
+%\def\STAR{\textit{STAR}}
+%\def\flts{\textit{flts}}
+%
 
 
-\def\ALTS{\textit{ALTS}}
-\def\ASTAR{\textit{ASTAR}}
-\def\DFA{\textit{DFA}}
-\def\bmkeps{\textit{bmkeps}}
-\def\retrieve{\textit{retrieve}}
-\def\blexer{\textit{blexer}}
-\def\flex{\textit{flex}}
-\def\inj{\mathit{inj}}
-\def\Empty{\mathit{Empty}}
-\def\Left{\mathit{Left}}
-\def\Right{\mathit{Right}}
-\def\Stars{\mathit{Stars}}
-\def\Char{\mathit{Char}}
-\def\Seq{\mathit{Seq}}
-\def\Der{\mathit{Der}}
-\def\nullable{\mathit{nullable}}
-\def\Z{\mathit{Z}}
-\def\S{\mathit{S}}
-\def\rup{r^\uparrow}
-%\def\bderssimp{\mathit{bders}\_\mathit{simp}}
-\def\distinctWith{\textit{distinctWith}}
-
-\def\rexp{\mathbf{rexp}}
-\def\simp{\mathit{simp}}
-\def\simpALTs{\mathit{simp}\_\mathit{ALTs}}
-\def\map{\mathit{map}}
-\def\distinct{\mathit{distinct}}
-\def\blexersimp{\mathit{blexer}\_\mathit{simp}}
-\def\map{\textit{map}}
-\def\vsuf{\textit{vsuf}}
-\def\sflataux{\textit{sflat}\_\textit{aux}}
-\def\rrexp{\textit{rrexp}}
-\def\rsize{\textit{rsize}}
-\def\asize{\textit{asize}}
-\def\rerase{\textit{rerase}}
-\def\erase{\textit{erase}}
-\def\STAR{\textit{STAR}}
-\def\flts{\textit{flts}}
-
-
-
-\end{abbreviations}

--- a/thys4/posix/FBound.thy	Mon Sep 12 23:32:18 2022 +0200
+++ b/thys4/posix/FBound.thy	Thu Sep 22 00:31:09 2022 +0100
@@ -619,7 +619,8 @@
   by (simp add: scomp_size_reduction)
   
 lemma leq1_eq1_equal:
-  shows "\<lbrakk>r1 \<le>1 r2;
+  shows "\<lbrakk>r1 \<le>1 r2; bsimp r2 = r1 \<rbrakk> \<Longrightarrow> r1 = r2 \<or> s_complexity r1 < s_complexity r2"
+  sorry

author	Chengsong
	Thu, 22 Sep 2022 00:31:09 +0100
changeset 601	ce4e5151a836
parent 600	fd068f39ac23
child 602	46db6ae66448

ChengsongTanPhdThesis/Chapters/Bitcoded2.tex		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/Chapters/Finite.tex		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/Chapters/Inj.tex		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/Chapters/Introduction.tex		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/example.bib		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/main.tex		file \| annotate \| diff \| comparison \| revisions
thys4/posix/FBound.thy		file \| annotate \| diff \| comparison \| revisions