lexing: thys/Journal/PaperExt.thy@43e070803c1c (annotated)

291 25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	1	(<)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	2	theory PaperExt
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	3	imports
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	4	("../LexerExt")
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	5	("../PositionsExt")
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	6	"~~/src/HOL/Library/LaTeXsugar"
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	7	begin
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	8	(>)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	9
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	10	(*
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	11	declare [[show_question_marks = false]]
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	12	declare [[eta_contract = false]]
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	13
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	14	abbreviation
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	15	"der_syn r c \<equiv> der c r"
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	16
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	17	abbreviation
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	18	"ders_syn r s \<equiv> ders s r"
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	19
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	20
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	21	notation (latex output)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	22	If ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	23	Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	24
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	25	ZERO ("\<^bold>0" 81) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	26	ONE ("\<^bold>1" 81) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	27	CHAR ("_" [1000] 80) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	28	ALT ("_ + _" [77,77] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	29	SEQ ("_ \<cdot> _" [77,77] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	30	STAR ("_\<^sup>\<star>" [78] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	31	NTIMES ("_\<^bsup>'{_'}\<^esup>" [78, 50] 80) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	32	FROMNTIMES ("_\<^bsup>'{_..'}\<^esup>" [78, 50] 80) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	33	UPNTIMES ("_\<^bsup>'{.._'}\<^esup>" [78, 50] 80) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	34	NMTIMES ("_\<^bsup>'{_.._'}\<^esup>" [78, 50,50] 80) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	35
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	36	val.Void ("Empty" 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	37	val.Char ("Char _" [1000] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	38	val.Left ("Left _" [79] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	39	val.Right ("Right _" [1000] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	40	val.Seq ("Seq _ _" [79,79] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	41	val.Stars ("Stars _" [1000] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	42
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	43	L ("L'(_')" [10] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	44	LV ("LV _ _" [80,73] 78) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	45	der_syn ("_\\_" [79, 1000] 76) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	46	ders_syn ("_\\_" [79, 1000] 76) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	47	flat ("\|_\|" [75] 74) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	48	flats ("\|_\|" [72] 74) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	49	Sequ ("_ @ _" [78,77] 63) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	50	injval ("inj _ _ _" [81,77,79] 76) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	51	mkeps ("mkeps _" [79] 76) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	52	length ("len _" [73] 73) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	53	intlen ("len _" [73] 73) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	54	set ("_" [73] 73) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	55
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	56	Prf ("_ : _" [75,75] 75) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	57	Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	58
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	59	lexer ("lexer _ _" [78,78] 77) and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	60	DUMMY ("\<^latex>\<open>\\underline{\\hspace{2mm}}\<close>")
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	61	*)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	62	(>)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	63
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	64	(*
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	65	section {* Extensions*}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	66
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	67	text {*
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	68
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	69	A strong point in favour of
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	70	Sulzmann and Lu's algorithm is that it can be extended in various
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	71	ways. If we are interested in tokenising a string, then we need to not just
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	72	split up the string into tokens, but also ``classify'' the tokens (for
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	73	example whether it is a keyword or an identifier). This can be done with
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	74	only minor modifications to the algorithm by introducing \emph{record
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	75	regular expressions} and \emph{record values} (for example
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	76	\cite{Sulzmann2014b}):
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	77
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	78	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	79	@{text "r :="}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	80	@{text "..."} $\mid$
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	81	@{text "(l : r)"} \qquad\qquad
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	82	@{text "v :="}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	83	@{text "..."} $\mid$
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	84	@{text "(l : v)"}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	85	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	86
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	87	\noindent where @{text l} is a label, say a string, @{text r} a regular
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	88	expression and @{text v} a value. All functions can be smoothly extended
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	89	to these regular expressions and values. For example \mbox{@{text "(l :
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	90	r)"}} is nullable iff @{term r} is, and so on. The purpose of the record
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	91	regular expression is to mark certain parts of a regular expression and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	92	then record in the calculated value which parts of the string were matched
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	93	by this part. The label can then serve as classification for the tokens.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	94	For this recall the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} for
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	95	keywords and identifiers from the Introduction. With the record regular
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	96	expression we can form \mbox{@{text "((key : r\<^bsub>key\<^esub>) + (id : r\<^bsub>id\<^esub>))\<^sup>\<star>"}}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	97	and then traverse the calculated value and only collect the underlying
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	98	strings in record values. With this we obtain finite sequences of pairs of
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	99	labels and strings, for example
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	100
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	101	\[@{text "(l\<^sub>1 : s\<^sub>1), ..., (l\<^sub>n : s\<^sub>n)"}\]
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	102
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	103	\noindent from which tokens with classifications (keyword-token,
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	104	identifier-token and so on) can be extracted.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	105
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	106	In the context of POSIX matching, it is also interesting to study additional
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	107	constructors about bounded-repetitions of regular expressions. For this let
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	108	us extend the results from the previous section to the following four
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	109	additional regular expression constructors:
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	110
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	111	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	112	\begin{tabular}{lcrl@ {\hspace{12mm}}l}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	113	@{text r} & @{text ":="} & $\ldots\mid$ & @{term "NTIMES r n"} & exactly-@{text n}-times\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	114	& & $\mid$ & @{term "UPNTIMES r n"} & upto-@{text n}-times\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	115	& & $\mid$ & @{term "FROMNTIMES r n"} & from-@{text n}-times\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	116	& & $\mid$ & @{term "NMTIMES r n m"} & between-@{text nm}-times\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	117	\end{tabular}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	118	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	119
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	120	\noindent
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	121	We will call them \emph{bounded regular expressions}.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	122	With the help of the power operator (definition ommited) for sets of strings, the languages
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	123	recognised by these regular expression can be defined in Isabelle as follows:
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	124
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	125	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	126	\begin{tabular}{lcl}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	127	@{thm (lhs) L.simps(8)} & $\dn$ & @{thm (rhs) L.simps(8)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	128	@{thm (lhs) L.simps(7)} & $\dn$ & @{thm (rhs) L.simps(7)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	129	@{thm (lhs) L.simps(9)} & $\dn$ & @{thm (rhs) L.simps(9)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	130	@{thm (lhs) L.simps(10)} & $\dn$ & @{thm (rhs) L.simps(10)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	131	\end{tabular}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	132	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	133
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	134	\noindent This definition implies that in the last clause @{term
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	135	"NMTIMES r n m"} matches no string in case @{term "m < n"}, because
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	136	then the interval @{term "{n..m}"} is empty. While the language
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	137	recognised by these regular expressions is straightforward, some
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	138	care is needed for how to define the corresponding lexical
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	139	values. First, with a slight abuse of language, we will (re)use
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	140	values of the form @{term "Stars vs"} for values inhabited in
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	141	bounded regular expressions. Second, we need to introduce inductive
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	142	rules for extending our inhabitation relation shown on
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	143	Page~\ref{prfintros}, from which we then derived our notion of
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	144	lexical values. Given the rule for @{term "STAR r"}, the rule for
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	145	@{term "UPNTIMES r n"} just requires additionally that the length of
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	146	the list of values must be smaller or equal to @{term n}:
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	147
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	148	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	149	@{thm[mode=Rule] Prf.intros(7)[of "vs"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	150	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	151
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	152	\noindent Like in the @{term "STAR r"}-rule, we require with the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	153	left-premise that some non-empty part of the string is `chipped'
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	154	away by \emph{every} value in @{text vs}, that means the corresponding
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	155	values do not flatten to the empty string. In the rule for @{term
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	156	"NTIMES r n"} (that is exactly-@{term n}-times @{text r}) we clearly need to require
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	157	that the length of the list of values equals to @{text n}. But enforcing
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	158	that every of these @{term n} values `chipps' away some part of a string
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	159	would be too strong.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	160
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	161
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	162	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	163	@{thm[mode=Rule] Prf.intros(8)[of "vs\<^sub>1" r "vs\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	164	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	165
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	166	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	167	\begin{tabular}{lcl}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	168	@{thm (lhs) der.simps(8)} & $\dn$ & @{thm (rhs) der.simps(8)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	169	@{thm (lhs) der.simps(7)} & $\dn$ & @{thm (rhs) der.simps(7)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	170	@{thm (lhs) der.simps(9)} & $\dn$ & @{thm (rhs) der.simps(9)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	171	@{thm (lhs) der.simps(10)} & $\dn$ & @{thm (rhs) der.simps(10)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	172	\end{tabular}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	173	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	174
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	175
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	176	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	177	\begin{tabular}{lcl}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	178	@{thm (lhs) mkeps.simps(5)} & $\dn$ & @{thm (rhs) mkeps.simps(5)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	179	@{thm (lhs) mkeps.simps(6)} & $\dn$ & @{thm (rhs) mkeps.simps(6)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	180	@{thm (lhs) mkeps.simps(7)} & $\dn$ & @{thm (rhs) mkeps.simps(7)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	181	@{thm (lhs) mkeps.simps(8)} & $\dn$ & @{thm (rhs) mkeps.simps(8)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	182	\end{tabular}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	183	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	184
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	185	\begin{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	186	\begin{tabular}{lcl}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	187	@{thm (lhs) injval.simps(8)} & $\dn$ & @{thm (rhs) injval.simps(8)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	188	@{thm (lhs) injval.simps(9)} & $\dn$ & @{thm (rhs) injval.simps(9)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	189	@{thm (lhs) injval.simps(10)} & $\dn$ & @{thm (rhs) injval.simps(10)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	190	@{thm (lhs) injval.simps(11)} & $\dn$ & @{thm (rhs) injval.simps(11)}\\
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	191	\end{tabular}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	192	\end{center}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	193
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	194
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	195	@{thm [mode=Rule] Posix_NTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	196
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	197	@{thm [mode=Rule] Posix_NTIMES2}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	198
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	199	@{thm [mode=Rule] Posix_UPNTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	200
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	201	@{thm [mode=Rule] Posix_UPNTIMES2}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	202
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	203	@{thm [mode=Rule] Posix_FROMNTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	204
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	205	@{thm [mode=Rule] Posix_FROMNTIMES3[of "s\<^sub>1" r v "s\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	206
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	207	@{thm [mode=Rule] Posix_FROMNTIMES2}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	208
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	209	@{thm [mode=Rule] Posix_NMTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	210
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	211	@{thm [mode=Rule] Posix_NMTIMES3[of "s\<^sub>1" r v "s\<^sub>2"]}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	212
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	213	@{thm [mode=Rule] Posix_NMTIMES2}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	214
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	215
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	216
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	217	@{term "\<Sum> i \<in> {m..n} . P i"} @{term "\<Sum> i \<in> {..n} . P i"}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	218	@{term "\<Union> i \<in> {m..n} . P i"} @{term "\<Union> i \<in> {..n} . P i"}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	219	@{term "\<Union> i \<in> {0::nat..n} . P i"}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	220
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	221
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	222
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	223	*}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	224
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	225
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	226
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	227	section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	228
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	229	text {*
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	230	% \newcommand{\greedy}{\succcurlyeq_{gr}}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	231	\newcommand{\posix}{>}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	232
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	233	An extended version of \cite{Sulzmann2014} is available at the website of
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	234	its first author; this includes some ``proofs'', claimed in
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	235	\cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	236	final form, we make no comment thereon, preferring to give general reasons
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	237	for our belief that the approach of \cite{Sulzmann2014} is problematic.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	238	Their central definition is an ``ordering relation'' defined by the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	239	rules (slightly adapted to fit our notation):
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	240
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	241	??
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	242
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	243	\noindent The idea behind the rules (A1) and (A2), for example, is that a
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	244	@{text Left}-value is bigger than a @{text Right}-value, if the underlying
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	245	string of the @{text Left}-value is longer or of equal length to the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	246	underlying string of the @{text Right}-value. The order is reversed,
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	247	however, if the @{text Right}-value can match a longer string than a
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	248	@{text Left}-value. In this way the POSIX value is supposed to be the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	249	biggest value for a given string and regular expression.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	250
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	251	Sulzmann and Lu explicitly refer to the paper \cite{Frisch2004} by Frisch
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	252	and Cardelli from where they have taken the idea for their correctness
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	253	proof. Frisch and Cardelli introduced a similar ordering for GREEDY
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	254	matching and they showed that their GREEDY matching algorithm always
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	255	produces a maximal element according to this ordering (from all possible
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	256	solutions). The only difference between their GREEDY ordering and the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	257	``ordering'' by Sulzmann and Lu is that GREEDY always prefers a @{text
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	258	Left}-value over a @{text Right}-value, no matter what the underlying
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	259	string is. This seems to be only a very minor difference, but it has
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	260	drastic consequences in terms of what properties both orderings enjoy.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	261	What is interesting for our purposes is that the properties reflexivity,
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	262	totality and transitivity for this GREEDY ordering can be proved
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	263	relatively easily by induction.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	264	*}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	265	*)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	266
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	267	section {* Conclusion *}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	268
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	269	text {*
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	270
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	271	We have implemented the POSIX value calculation algorithm introduced by
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	272	Sulzmann and Lu
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	273	\cite{Sulzmann2014}. Our implementation is nearly identical to the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	274	original and all modifications we introduced are harmless (like our char-clause for
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	275	@{text inj}). We have proved this algorithm to be correct, but correct
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	276	according to our own specification of what POSIX values are. Our
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	277	specification (inspired from work by Vansummeren \cite{Vansummeren2006}) appears to be
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	278	much simpler than in \cite{Sulzmann2014} and our proofs are nearly always
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	279	straightforward. We have attempted to formalise the original proof
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	280	by Sulzmann and Lu \cite{Sulzmann2014}, but we believe it contains
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	281	unfillable gaps. In the online version of \cite{Sulzmann2014}, the authors
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	282	already acknowledge some small problems, but our experience suggests
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	283	that there are more serious problems.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	284
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	285	Having proved the correctness of the POSIX lexing algorithm in
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	286	\cite{Sulzmann2014}, which lessons have we learned? Well, this is a
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	287	perfect example for the importance of the \emph{right} definitions. We
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	288	have (on and off) explored mechanisations as soon as first versions
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	289	of \cite{Sulzmann2014} appeared, but have made little progress with
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	290	turning the relatively detailed proof sketch in \cite{Sulzmann2014} into a
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	291	formalisable proof. Having seen \cite{Vansummeren2006} and adapted the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	292	POSIX definition given there for the algorithm by Sulzmann and Lu made all
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	293	the difference: the proofs, as said, are nearly straightforward. The
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	294	question remains whether the original proof idea of \cite{Sulzmann2014},
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	295	potentially using our result as a stepping stone, can be made to work?
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	296	Alas, we really do not know despite considerable effort.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	297
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	298
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	299	Closely related to our work is an automata-based lexer formalised by
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	300	Nipkow \cite{Nipkow98}. This lexer also splits up strings into longest
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	301	initial substrings, but Nipkow's algorithm is not completely
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	302	computational. The algorithm by Sulzmann and Lu, in contrast, can be
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	303	implemented with ease in any functional language. A bespoke lexer for the
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	304	Imp-language is formalised in Coq as part of the Software Foundations book
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	305	by Pierce et al \cite{Pierce2015}. The disadvantage of such bespoke lexers is that they
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	306	do not generalise easily to more advanced features.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	307	Our formalisation is available from the Archive of Formal Proofs \cite{aduAFP16}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	308	under \url{http://www.isa-afp.org/entries/Posix-Lexing.shtml}.\medskip
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	309
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	310	\noindent
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	311	{\bf Acknowledgements:}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	312	We are very grateful to Martin Sulzmann for his comments on our work and
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	313	moreover for patiently explaining to us the details in \cite{Sulzmann2014}. We
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	314	also received very helpful comments from James Cheney and anonymous referees.
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	315	% \small
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	316	\bibliographystyle{plain}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	317	\bibliography{root}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	318
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	319	*}
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	320
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	321	(<)
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	322	end
25b7d6bfd294 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	323	(>)

author	Christian Urban <urbanc@in.tum.de>
	Fri, 10 May 2019 11:56:37 +0100
changeset 318	43e070803c1c
parent 291	25b7d6bfd294
permissions	-rw-r--r--