author | Christian Urban <christian.urban@kcl.ac.uk> |
Sat, 24 Oct 2020 12:13:39 +0100 | |
changeset 359 | fedc16924b76 |
parent 358 | 06aa99b54423 |
child 360 | e752d84225ec |
Binary file Literature/2019_borsotti_trofimovich_efficient_posix_submatch_extraction_on_nfa.pdf has changed
--- a/Slides/data.sty Wed Sep 18 16:35:57 2019 +0100 +++ b/Slides/data.sty Sat Oct 24 12:13:39 2020 +0100 @@ -1,6 +1,7 @@ % The data files, written on the first run. -\begin{filecontents}{data/re-python.data} +%% example a?{n} a{n} +\begin{filecontents}{re-python.data} 1 0.029 5 0.029 10 0.029 @@ -21,7 +22,27 @@ #29 58.886 \end{filecontents} -\begin{filecontents}{data/re-ruby.data} + +\begin{filecontents}{re-python2.data} +1 0.033 +5 0.036 +10 0.034 +15 0.036 +18 0.059 +19 0.084 +20 0.141 +21 0.248 +22 0.485 +23 0.878 +24 1.71 +25 3.40 +26 7.08 +27 14.12 +28 26.69 +\end{filecontents} + +%% example a?{n} a{n} +\begin{filecontents}{re-ruby.data} 1 0.00006 #2 0.00003 #3 0.00001 @@ -52,8 +73,69 @@ 28 34.79653 \end{filecontents} +% JavaScript, example (a*)*b +\begin{filecontents}{re-js.data} +5 0.061 +10 0.061 +15 0.061 +20 0.070 +23 0.131 +25 0.308 +26 0.564 +28 1.994 +30 7.648 +31 15.881 +32 32.190 +\end{filecontents} -\begin{filecontents}{data/re1.data} +% Java 8, example (a*)*b +\begin{filecontents}{re-java.data} +5 0.00298 +10 0.00418 +15 0.00996 +16 0.01710 +17 0.03492 +18 0.03303 +19 0.05084 +20 0.10177 +21 0.19960 +22 0.41159 +23 0.82234 +24 1.70251 +25 3.36112 +26 6.63998 +27 13.35120 +28 29.81185 +\end{filecontents} + +% Java 9+, example (a*)*b +\begin{filecontents}{re-java9.data} +1000 0.01871 +3000 0.16727 +5000 0.44669 +7000 0.87708 +9000 1.46304 +11000 2.21094 +13000 3.08650 +15000 4.23359 +17000 4.97240 +19000 6.50150 +21000 8.43740 +23000 9.66842 +25000 10.93754 +27000 13.51069 +29000 14.73643 +31000 16.69299 +33000 19.04270 +35000 21.08329 +37000 23.75398 +39000 26.15787 +\end{filecontents} + + + +%% re1.scala: example a?{n} a{n} +\begin{filecontents}{re1.data} 1 0.00179 2 0.00011 3 0.00014 @@ -77,71 +159,291 @@ 21 21.46013 \end{filecontents} -\begin{filecontents}{data/re2a.data} -1 0.00227 -5 0.00027 -10 0.00075 -15 0.00178 -20 0.00102 -25 0.00028 -30 0.00040 -35 0.00052 -40 0.00075 -45 0.00125 -50 0.00112 -55 0.00099 -60 0.00113 -65 0.00137 -70 0.00170 + +%% re1.scala: example (a*)* b +\begin{filecontents}{re1a.data} +1 0.00003 +2 0.00002 +3 0.00004 +4 0.00023 +5 0.00012 +6 0.00016 +7 0.00036 +8 0.00100 +9 0.00158 +10 0.00271 +11 0.00420 +12 0.01034 +13 0.01629 +14 0.03469 +15 0.08800 +16 0.12071 +17 0.27164 +18 0.53962 +19 1.05733 +20 2.34022 +\end{filecontents} + +%% re2.scala example a?{n} a{n} +\begin{filecontents}{re2.data} +1 0.00050 +101 0.02030 +201 0.10587 +301 0.31188 +401 0.32794 +501 0.64490 +601 1.16738 +701 2.10815 +801 3.47144 +901 6.80621 +1001 12.35611 +1101 23.80084 +\end{filecontents} + +%% re2.scala: example (a*)* b +\begin{filecontents}{re2a.data} +1 0.00004 +2 0.00003 +3 0.00004 +4 0.00014 +5 0.00017 +6 0.00029 +7 0.00046 +8 0.00084 +9 0.00137 +10 0.00203 +11 0.00379 +12 0.00783 +13 0.01583 +14 0.04725 +15 0.06672 +16 0.16228 +17 0.25493 +18 0.53676 +19 1.09052 +20 2.56922 +\end{filecontents} + +%% re3.scala: example a?{n} a{n} +\begin{filecontents}{re3.data} +1 0.00003 +1001 0.03887 +2001 0.15666 +3001 0.35910 +4001 0.63950 +5001 1.00241 +6001 1.50480 +7001 2.11568 +8001 2.71208 +9001 3.41157 +10001 4.19962 +11001 5.70387 +\end{filecontents} + +%% re3.scala: example (a*)* b +\begin{filecontents}{re3a.data} +1 0.00003 +500001 0.22527 +1000001 0.62752 +1500001 0.88485 +2000001 1.39815 +2500001 1.68619 +3000001 1.94957 +3500001 2.15878 +4000001 2.59918 +4500001 5.90679 +5000001 13.11295 +5500001 19.15376 +6000001 40.16373 +\end{filecontents} +\begin{filecontents}{re3b.data} +1 0.00015 +500001 0.28337 +1000001 0.53271 +1500001 0.84478 +2000001 1.11763 +2500001 1.76656 +3000001 2.13310 +3500001 2.39576 +4000001 2.98624 +4500001 5.96529 +5000001 13.56911 +5500001 18.43089 +6000001 40.33704 +\end{filecontents} + +%% re4.scala example a?{n} a{n} +\begin{filecontents}{re4.data} +1 0.01399 +500001 1.43645 +1000001 2.59394 +1500001 4.07990 +2000001 5.22473 +2500001 6.41714 +3000001 7.60118 +3500001 9.02056 +4000001 10.50393 +4500001 11.56631 +5000001 13.72020 +5500001 15.09634 +6000001 29.26990 +6500001 33.41039 +7000001 39.06532 \end{filecontents} -\begin{filecontents}{data/re2b.data} -1 0.00020 -51 0.00080 -101 0.00678 -151 0.01792 -201 0.04815 -251 0.09648 -301 0.23195 -351 0.52646 -401 0.96277 -451 1.57726 -501 2.00166 -551 2.98341 -601 4.81181 -651 6.57054 -701 9.73973 -751 14.25762 -801 14.80760 -851 19.60958 -901 25.43550 -951 31.96038 +%% re4.scala example (a*)* b +\begin{filecontents}{re4a.data} +1 0.00015 +500001 2.57302 +1000001 5.58966 +1500001 8.16531 +2000001 10.85055 +2500001 13.42080 +3000001 16.08712 +3500001 18.58433 +4000001 21.23788 +4500001 23.72459 +5000001 27.47479 +5500001 31.85240 +6000001 37.12461 +6500001 39.90294 +7000001 53.50961 +\end{filecontents} + +\begin{filecontents}{nfa.data} +0 0.00099 +5 0.01304 +10 0.05350 +15 0.10152 +20 0.10876 +25 0.06984 +30 0.09693 +35 0.04805 +40 0.07512 +45 0.07624 +50 0.10451 +55 0.13285 +60 0.15748 +65 0.19982 +70 0.24075 +75 0.28963 +80 0.35734 +85 0.43735 +90 0.49692 +95 0.59551 +100 0.72236 +\end{filecontents} + +\begin{filecontents}{nfasearch.data} +0 0.00009 +1 0.00147 +2 0.00030 +3 0.00062 +4 0.00132 +5 0.00177 +6 0.00487 +7 0.00947 +8 0.01757 +9 0.02050 +10 0.02091 +11 0.04002 +12 0.08662 +13 0.17269 +14 0.37255 +15 0.81935 +16 1.76254 +17 3.89442 +18 8.42263 +19 17.89661 +20 38.21481 \end{filecontents} -\begin{filecontents}{data/re3.data} -1 0.001605 -501 0.131066 -1001 0.057885 -1501 0.136875 -2001 0.176238 -2501 0.254363 -3001 0.37262 -3501 0.500946 -4001 0.638384 -4501 0.816605 -5001 1.00491 -5501 1.232505 -6001 1.525672 -6501 1.757502 -7001 2.092784 -7501 2.429224 -8001 2.803037 -8501 3.463045 -9001 3.609 -9501 4.081504 -10001 4.54569 -10501 6.17789 -11001 6.77242 -11501 7.95864 +\begin{filecontents}{compiled.data} +%1 0.234146 +%5000 0.227539 +%10000 0.280748 +50000 1.087897 +100000 3.713165 +250000 21.6624545 +500000 85.872613 +750000 203.6408015 +1000000 345.736574 +\end{filecontents} + +\begin{filecontents}{interpreted.data} +200 1.005863 +400 7.8296765 +500 15.43106 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 +\end{filecontents} + +\begin{filecontents}{interpreted2.data} +0 0 +200 1.005863 +400 7.8296765 +600 27.2321885 +800 65.249271 +1000 135.4493445 +1200 232.134097 +1400 382.527227 +\end{filecontents} + +\begin{filecontents}{compiled2.data} +0 0 +200 0.222058 +400 0.215204 +600 0.202031 +800 0.21986 +1000 0.205934 +1200 0.1981615 +1400 0.207116 \end{filecontents} + +\begin{filecontents}{s-grammar1.data} +1 0.01152 +51 0.07973 +101 0.09726 +151 0.09320 +201 0.10010 +251 0.16997 +301 0.26662 +351 0.46118 +401 0.62516 +451 0.87247 +501 1.16334 +551 1.71152 +601 2.10958 +651 2.44360 +701 2.98488 +751 3.50326 +801 4.11036 +851 4.93394 +901 5.77465 +951 7.39123 +\end{filecontents} + +\begin{filecontents}{s-grammar2.data} +1 0.01280 +2 0.00064 +3 0.00173 +4 0.00355 +5 0.00965 +6 0.02674 +7 0.06953 +8 0.11166 +9 0.18707 +10 0.09189 +11 0.12724 +12 0.24337 +13 0.59304 +14 1.53594 +15 4.01195 +16 10.73582 +17 29.51587 +#18 73.14163 +\end{filecontents}
--- a/Slides/slides.sty Wed Sep 18 16:35:57 2019 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -\usepackage[absolute,overlay]{textpos} -\usepackage{xcolor} -\usepackage{fontspec} -\usepackage[sc]{mathpazo} -\usefonttheme{serif} -\defaultfontfeatures{Ligatures=TeX} -\defaultfontfeatures{Mapping=tex-text} -\setromanfont{Hoefler Text} -\setmonofont[Scale=.88]{Consolas} -\newfontfamily{\consolas}{Consolas} - - -\definecolor{darkblue}{rgb}{0,0,0.6} -\hypersetup{colorlinks=true} -\hypersetup{linkcolor=darkblue} -\hypersetup{urlcolor=darkblue} - - - -\newcommand{\tttext}[1]{{\consolas{#1}}} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% -\newcommand{\slidecaption}{} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Frametitles - -\setbeamerfont{frametitle}{size={\LARGE}} -\setbeamerfont{frametitle}{family={\fontspec{Hoefler Text Black}}} -\setbeamercolor{frametitle}{fg=ProcessBlue,bg=white} - -\setbeamertemplate{frametitle}{% -\vskip 2mm % distance from the top margin -\hskip -3mm % distance from left margin -\vbox{% -\begin{minipage}{1.05\textwidth}% -\centering% -\begin{tabular}{@{}c@{}}% -\insertframetitle% -\end{tabular}% -\end{minipage}\vspace{-10pt}}% -} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Foot -% -\setbeamertemplate{navigation symbols}{} -\usefoottemplate{% -\vbox{% - \tinyline{% - \tiny\hfill\textcolor{gray!50}{\slidecaption{} -- - p.~\insertframenumber/\inserttotalframenumber}}}% -} - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\beamertemplateballitem -\setlength\leftmargini{2mm} -\setlength\leftmarginii{0.6cm} -\setlength\leftmarginiii{1.5cm} -\setbeamertemplate{itemize/enumerate body end}{\vspace{-2mm}} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% blocks -%\definecolor{cream}{rgb}{1,1,.65} -\definecolor{cream}{rgb}{1,1,.8} -\setbeamerfont{block title}{size=\normalsize} -\setbeamercolor{block title}{fg=black,bg=cream} -\setbeamercolor{block body}{fg=black,bg=cream} - -\setbeamertemplate{blocks}[rounded][shadow=true] - -\setbeamercolor{boxcolor}{fg=black,bg=cream} - -\mode -<all> - - - - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Slides/slides2.sty Sat Oct 24 12:13:39 2020 +0100 @@ -0,0 +1,86 @@ +\usepackage[absolute,overlay]{textpos} +\usepackage{xcolor} +\usepackage{fontspec} +\usepackage[sc]{mathpazo} +\usefonttheme{serif} +\defaultfontfeatures{Ligatures=TeX} +\defaultfontfeatures{Mapping=tex-text} +\setromanfont{Hoefler Text} +\setmonofont[Scale=.88]{Consolas} +\newfontfamily{\consolas}{Consolas} + + +\definecolor{darkblue}{rgb}{0,0,0.6} +\hypersetup{colorlinks=true} +\hypersetup{linkcolor=darkblue} +\hypersetup{urlcolor=darkblue} + + + +\newcommand{\tttext}[1]{{\consolas{#1}}} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% +\newcommand{\slidecaption}{} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Frametitles + +\setbeamerfont{frametitle}{size={\LARGE}} +\setbeamerfont{frametitle}{family={\fontspec{Hoefler Text Black}}} +\setbeamercolor{frametitle}{fg=ProcessBlue,bg=white} + +\setbeamertemplate{frametitle}{% +\vskip 2mm % distance from the top margin +\hskip -3mm % distance from left margin +\vbox{% +\begin{minipage}{1.05\textwidth}% +\centering% +\begin{tabular}{@{}c@{}}% +\insertframetitle% +\end{tabular}% +\end{minipage}\vspace{-10pt}}% +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Foot +% +\setbeamertemplate{navigation symbols}{} +\usefoottemplate{% +\vbox{% + \tinyline{% + \tiny\hfill\textcolor{gray!50}{\slidecaption{} -- + p.~\insertframenumber/\inserttotalframenumber}}}% +} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\beamertemplateballitem +\setlength\leftmargini{2mm} +\setlength\leftmarginii{0.6cm} +\setlength\leftmarginiii{1.5cm} +\setbeamertemplate{itemize/enumerate body end}{\vspace{-2mm}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% blocks +%\definecolor{cream}{rgb}{1,1,.65} +\definecolor{cream}{rgb}{1,1,.8} +\setbeamerfont{block title}{size=\normalsize} +\setbeamercolor{block title}{fg=black,bg=cream} +\setbeamercolor{block body}{fg=black,bg=cream} + +\setbeamertemplate{blocks}[rounded][shadow=true] + +\setbeamercolor{boxcolor}{fg=black,bg=cream} + +\mode +<all> + + + + + +
--- a/progs/haskell/re.hs Wed Sep 18 16:35:57 2019 +0100 +++ b/progs/haskell/re.hs Sat Oct 24 12:13:39 2020 +0100 @@ -3,8 +3,9 @@ import Text.Printf import Control.Exception import System.CPUTime -import Control.Parallel.Strategies +--import Control.Parallel.Strategies import Control.Monad +import Control.DeepSeq lim :: Int lim = 1 @@ -15,7 +16,7 @@ start <- getCPUTime replicateM_ lim $ do x <- evaluate $ 1 + y - rdeepseq x `seq` return () + deepseq x `seq` return () end <- getCPUTime let diff = (fromIntegral (end - start)) / (10^12) printf "%0.9f\n" (diff :: Double) @@ -245,7 +246,7 @@ (_, ZERO) -> (ZERO, f_id) (ONE, _) -> (r2s, f_seq_void1 f1d f2s) (_, ONE) -> (r1d, f_seq_void2 f1d f2s) - (_, _) -> (SEQ r1d r2s, f_seq f1d f2s)) + (_, _) -> (SEQ r1d r2s, f_seq f1d f2s)) STAR r1 -> let (r1d, f1d) = der_simp c r1 in
--- a/progs/scala/re-basic.scala Wed Sep 18 16:35:57 2019 +0100 +++ b/progs/scala/re-basic.scala Sat Oct 24 12:13:39 2020 +0100 @@ -92,6 +92,31 @@ case c::s => ders(s, der(c, r)) } + +def closed_seq_ders() + +def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match { + case (Nil, r) => r + case (s, ZERO) => ZERO + case (s, ONE) => if (s == Nil) ONE else ZERO + case (s, CHAR(c)) => if (s == List(c)) ONE else + if (s == Nil) CHAR(c) else ZERO + case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2)) + case (c::s, r) => ders2(s, simp(der(c, r))) +} + +// derivative w.r.t. a string (iterates der) +def cders (s: List[Char], r: Rexp) : Rexp = s match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (s == List(d)) ONE else ZERO + case ALT(r1, r2) => ALT(cders(s, r1), cders(s, r2)) + case SEQ(r1, r2) => closed_seq_ders(r1, r2, s) ??? + case STAR(r) => SEQ(cders(s, r), STAR(r)) ??? + case RECD(_, r1) => cders(s, r1) +} + + // extracts a string from value def flatten(v: Val) : String = v match { case Empty => ""
--- a/progs/scala/re-bit.scala Wed Sep 18 16:35:57 2019 +0100 +++ b/progs/scala/re-bit.scala Sat Oct 24 12:13:39 2020 +0100 @@ -324,6 +324,9 @@ val sulzmann = ("a" | "b" | "ab").% + +println(s"Sulzmann Test: ${alexing(sulzmann, "ab" * 1)}") + println(alexing(sulzmann, "a" * 10)) println(alexing_simp(sulzmann, "a" * 10)) @@ -392,7 +395,7 @@ def tests_inj(ss: Set[String])(r: Rexp) = { clear() println(s"Testing ${r}") - for (s <- ss.par) yield { + for (s <- ss) yield { val res1 = Try(Some(alexing(r, s))).getOrElse(None) val res2 = Try(Some(alexing_simp(r, s))).getOrElse(None) if (res1 != res2) println(s"Disagree on ${r} and ${s}") @@ -412,7 +415,7 @@ def tests_alexer(ss: Set[String])(r: Rexp) = { clear() println(s"Testing ${r}") - for (s <- ss.par) yield { + for (s <- ss) yield { val d = der('b', r) val ad = ader('b', internalise(r)) val res1 = Try(Some(encode(inj(r, 'a', alexing(d, s))))).getOrElse(None)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/scala/re-bit2.scala Sat Oct 24 12:13:39 2020 +0100 @@ -0,0 +1,691 @@ +import scala.language.implicitConversions +import scala.language.reflectiveCalls +import scala.annotation.tailrec + + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + + + +abstract class Bit +case object Z extends Bit +case object S extends Bit + +type Bits = List[Bit] + +abstract class ARexp +case object AZERO extends ARexp +case class AONE(bs: Bits) extends ARexp +case class ACHAR(bs: Bits, c: Char) extends ARexp +case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp +case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp +case class ASTAR(bs: Bits, r: ARexp) extends ARexp + +def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) + +abstract class Val +case object Empty extends Val +case class Chr(c: Char) extends Val +case class Sequ(v1: Val, v2: Val) extends Val +case class Left(v: Val) extends Val +case class Right(v: Val) extends Val +case class Stars(vs: List[Val]) extends Val +case class Position(i: Int, v: Val) extends Val // for testing purposes +case object Undefined extends Val // for testing purposes + +// some convenience for typing in regular expressions +def charlist2rexp(s: List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s: String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable(r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der(c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders(s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// mkeps and injection part +def mkeps(r: Rexp) : Val = r match { + case ONE => Empty + case ALT(r1, r2) => + if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) + case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) + case STAR(r) => Stars(Nil) +} + + +def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { + case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) + case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) + case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) + case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) + case (CHAR(d), Empty) => Chr(c) +} + +// main lexing function (produces a value) +// - no simplification +def lex(r: Rexp, s: List[Char]) : Val = s match { + case Nil => if (nullable(r)) mkeps(r) + else throw new Exception("Not matched") + case c::cs => inj(r, c, lex(der(c, r), cs)) +} + +def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) + + + +// Bitcoded + Annotation +//======================= + +//erase function: extracts the regx from Aregex +def erase(r:ARexp): Rexp = r match{ + case AZERO => ZERO + case AONE(_) => ONE + case ACHAR(bs, c) => CHAR(c) + case AALTS(bs, Nil) => ZERO + case AALTS(bs, r::Nil) => erase(r) + case AALTS(bs, r::rs) => ALT(erase(r), erase(AALTS(bs, rs))) + case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) + case ASTAR(cs, r)=> STAR(erase(r)) +} + +// translation into ARexps +def fuse(bs: Bits, r: ARexp) : ARexp = r match { + case AZERO => AZERO + case AONE(cs) => AONE(bs ++ cs) + case ACHAR(cs, c) => ACHAR(bs ++ cs, c) + case AALTS(cs, rs) => AALTS(bs ++ cs, rs) + case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) + case ASTAR(cs, r) => ASTAR(bs ++ cs, r) +} + +def internalise(r: Rexp) : ARexp = r match { + case ZERO => AZERO + case ONE => AONE(Nil) + case CHAR(c) => ACHAR(Nil, c) + case ALT(r1, r2) => AALT(Nil, fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))) + case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) + case STAR(r) => ASTAR(Nil, internalise(r)) +} + + +internalise(("a" | "ab") ~ ("b" | "")) + + + +def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { + case (ONE, bs) => (Empty, bs) + case (CHAR(c), bs) => (Chr(c), bs) + case (ALT(r1, r2), Z::bs) => { + val (v, bs1) = decode_aux(r1, bs) + (Left(v), bs1) + } + case (ALT(r1, r2), S::bs) => { + val (v, bs1) = decode_aux(r2, bs) + (Right(v), bs1) + } + case (SEQ(r1, r2), bs) => { + val (v1, bs1) = decode_aux(r1, bs) + val (v2, bs2) = decode_aux(r2, bs1) + (Sequ(v1, v2), bs2) + } + case (STAR(r1), Z::bs) => { + val (v, bs1) = decode_aux(r1, bs) + val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) + (Stars(v::vs), bs2) + } + case (STAR(_), S::bs) => (Stars(Nil), bs) +} + +def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match { + case (v, Nil) => v + case _ => throw new Exception("Not decodable") +} + +def encode(v: Val) : Bits = v match { + case Empty => Nil + case Chr(c) => Nil + case Left(v) => Z :: encode(v) + case Right(v) => S :: encode(v) + case Sequ(v1, v2) => encode(v1) ::: encode(v2) + case Stars(Nil) => List(S) + case Stars(v::vs) => Z :: encode(v) ::: encode(Stars(vs)) +} + + +// nullable function: tests whether the aregular +// expression can recognise the empty string +def bnullable (r: ARexp) : Boolean = r match { + case AZERO => false + case AONE(_) => true + case ACHAR(_,_) => false + case AALTS(_, rs) => rs.exists(bnullable) + case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) + case ASTAR(_, _) => true +} + +def bmkeps(r: ARexp) : Bits = r match { + case AONE(bs) => bs + case AALTS(bs, r::Nil) => bs ++ bmkeps(r) + case AALTS(bs, r::rs) => + if (bnullable(r)) bs ++ bmkeps(r) else bmkeps(AALTS(bs, rs)) + case ASEQ(bs, r1, r2) => bs ++ bmkeps(r1) ++ bmkeps(r2) + case ASTAR(bs, r) => bs ++ List(S) +} + +// derivative of a regular expression w.r.t. a character +def bder(c: Char, r: ARexp) : ARexp = r match { + case AZERO => AZERO + case AONE(_) => AZERO + case ACHAR(bs, d) => if (c == d) AONE(bs) else AZERO + case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) + case ASEQ(bs, r1, r2) => + if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2))) + else ASEQ(bs, bder(c, r1), r2) + case ASTAR(bs, r) => ASEQ(bs, fuse(List(Z), bder(c, r)), ASTAR(Nil, r)) +} + +// derivative w.r.t. a string (iterates der) +@tailrec +def bders (s: List[Char], r: ARexp) : ARexp = s match { + case Nil => r + case c::s => bders(s, bder(c, r)) +} + +// main unsimplified lexing function (produces a value) +def blex(r: ARexp, s: List[Char]) : Bits = s match { + case Nil => if (bnullable(r)) bmkeps(r) else throw new Exception("Not matched") + case c::cs => blex(bder(c, r), cs) +} + +def pre_blexing(r: ARexp, s: String) : Bits = blex(r, s.toList) +def blexing(r: Rexp, s: String) : Val = decode(r, pre_blexing(internalise(r), s)) + + +// example by Tudor +val reg = ((("a".%)) ~ ("b" | "c")).% + +println(blexing(reg, "aab")) + + +//======================= +// simplification +// + + +def flts(rs: List[ARexp]) : List[ARexp] = rs match { + case Nil => Nil + case AZERO :: rs => flts(rs) + case AALTS(bs, rs1) :: rs => rs1.map(fuse(bs, _)) ++ flts(rs) + case r1 :: rs => r1 :: flts(rs) +} + +def distinctBy[B, C](xs: List[B], + f: B => C, + acc: List[C] = Nil): List[B] = xs match { + case Nil => Nil + case x::xs => { + val res = f(x) + if (acc.contains(res)) distinctBy(xs, f, acc) + else x::distinctBy(xs, f, res::acc) + } +} + + +def bsimp(r: ARexp): ARexp = r match { + case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { + case (AZERO, _) => AZERO + case (_, AZERO) => AZERO + case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) + //case (AALTS(bs, rs), r2) => AALTS(bs, rs.map(ASEQ(Nil, _, r2))) + case (r1s, r2s) => ASEQ(bs1, r1s, r2s) + } + case AALTS(bs1, rs) => distinctBy(flts(rs.map(bsimp)), erase) match { + case Nil => AZERO + case r::Nil => fuse(bs1, r) + case rs => AALTS(bs1, rs) + } + case r => r +} + +def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { + case Nil => if (bnullable(r)) bmkeps(r) + else throw new Exception("Not matched") + case c::cs => blex(bsimp(bder(c, r)), cs) +} + +def blexing_simp(r: Rexp, s: String) : Val = + decode(r, blex_simp(internalise(r), s.toList)) + +println(blexing_simp(reg, "aab")) + +// bsimp2 by Chengsong + +def pos_i(rs: List[ARexp], v: Val): Int = (rs, v) match { + case (r::Nil, v1) => 0 + case ( r::rs1, Right(v)) => pos_i(rs1, v) + 1 + case ( r::rs1, Left(v) ) => 0 +} + +def pos_v(rs: List[ARexp], v: Val): Val = (rs, v) match { + case (r::Nil, v1) => v1 + case (r::rs1, Right(v)) => pos_v(rs1, v) + case (r::rs1, Left(v) ) => v +} + +def unify(rs: List[ARexp], v: Val): Val = { + Position(pos_i(rs, v), pos_v(rs, v)) +} + +// coat does the job of "coating" a value +// given the number of right outside it +def coat(v: Val, i: Int) : Val = i match { + case 0 => v + case i => if (i > 0) coat(Right(v), i - 1) else { println(v, i); throw new Exception("coat minus")} +} + +def distinctBy2[B, C](v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match { + case Nil => (res, v) + case (x::xs) => { + val re = f(x) + if (acc.contains(re)) v match { + case Position(i, v0) => distinctBy2(Position(i - 1, v0), xs, f, acc, res) + case _ => throw new Exception("dB2") + } + else distinctBy2(v, xs, f, re::acc, x::res) + } + } + + +def flats(rs: List[ARexp]): List[ARexp] = rs match { + case Nil => Nil + case AZERO :: rs1 => flats(rs1) + case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) + case r1 :: rs2 => r1 :: flats(rs2) + } + + + +def flats2(front: List[ARexp], i: Int, rs: List[ARexp], v: Val): (List[ARexp], Val) = v match { + case Position(j, v0) => { + if (i < 0) (front ::: flats(rs), Position(j, v0) ) + else if(i == 0){ + rs match { + case AALTS(bs, rs1) :: rs2 => ( (front ::: rs1.map(fuse(bs, _))):::flats(rs2), Position(j + rs1.length - 1, pos_v(rs1, v0))) + case r::rs2 => (front ::: List(r) ::: flats(rs2), Position(j, v0)) + case _ => throw new Exception("flats2 i = 0") + } + } + else{ + rs match { + case AZERO::rs1 => flats2(front, i - 1, rs1, Position(j - 1, v0)) + case AALTS(bs, rs1) ::rs2 => flats2(front:::rs1.map(fuse(bs, _)), i - 1, rs2, Position(j + rs1.length - 1, v0)) + case r::rs1 => flats2(front:::List(r), i - 1, rs1, Position(j, v0)) + case _ => throw new Exception("flats2 i>0") + } + } + } + case _ => throw new Exception("flats2 error") + } + +def deunify(rs_length: Int, v: Val): Val = v match{ + case Position(i, v0) => { + if (i <0 ) { println(rs_length, v); throw new Exception("deunify minus") } + else if (rs_length == 1) {println(v); v} + else if (rs_length - 1 == i) coat(v0, i) + else coat(Left(v0), i) + } + case _ => throw new Exception("deunify error") +} + + +def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r, v) match { + case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { + case ((AZERO, _), (_, _)) => (AZERO, Undefined) + case ((_, _), (AZERO, _)) => (AZERO, Undefined) + case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s) + // v2 tells how to retrieve bits in r2s, which is enough as the bits + // of the first part of the sequence has already been integrated to + // the top level of the second regx. + case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) + } + case (AALTS(bs1, rs), v) => { + val vlist = unify(rs, v) + vlist match { + case Position(i, v0) => { + val v_simp = bsimp2(rs(i), v0)._2 + val rs_simp = rs.map(bsimp) + val flat_res = flats2( Nil, i, rs_simp, Position(i, v_simp) ) + val dist_res = distinctBy2(flat_res._2, flat_res._1, erase) + val rs_new = dist_res._1 + val v_new = deunify(rs_new.length, dist_res._2) + rs_new match { + case Nil => (AZERO, Undefined) + case s :: Nil => (fuse(bs1, s), v_new) + case rs => (AALTS(bs1, rs), v_new) + } + } + case _ => throw new Exception("Funny vlist bsimp2") + } + } + // STAR case + // case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) + case (r, v) => (r, v) + } + + + +val dr = ASEQ(List(),AALTS(List(S),List(AONE(List(Z)), AONE(List(S)))),ASTAR(List(),AALTS(List(),List(ACHAR(List(Z),'a'), ACHAR(List(S),'a'))))) +val dv = Sequ(Left(Empty),Stars(List())) +println(bsimp2(dr, dv)) + + +/* +def blex_simp2(r: ARexp, s: List[Char]) : Bits = s match { + case Nil => if (bnullable(r)) bmkeps(r) + else throw new Exception("Not matched") + case c::cs => blex(bsimp2(bder(c, r)), cs) +} + +def blexing_simp2(r: Rexp, s: String) : Val = + decode(r, blex_simp2(internalise(r), s.toList)) + +println(blexing_simp2(reg, "aab")) +*/ + +// extracts a string from value +def flatten(v: Val) : String = v match { + case Empty => "" + case Chr(c) => c.toString + case Left(v) => flatten(v) + case Right(v) => flatten(v) + case Sequ(v1, v2) => flatten(v1) + flatten(v2) + case Stars(vs) => vs.map(flatten).mkString +} + +// extracts an environment from a value +def env(v: Val) : List[(String, String)] = v match { + case Empty => Nil + case Chr(c) => Nil + case Left(v) => env(v) + case Right(v) => env(v) + case Sequ(v1, v2) => env(v1) ::: env(v2) + case Stars(vs) => vs.flatMap(env) +} + +// Some Tests +//============ + +/* +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +val evil1 = (("a").%) ~ "b" +val evil2 = (((("a").%).%).%) ~ "b" +val evil3 = (("a"~"a") | ("a")).% + +for(i <- 1 to 10000 by 1000) { + println(time_needed(1, blex_simp(internalise(evil1), ("a"*i + "b").toList))) +} + +for(i <- 1 to 10000 by 1000) { + println(time_needed(1, blexing_simp(evil1, "a"*i + "b"))) +} + +for(i <- 1 to 10000 by 1000) { + println(time_needed(1, blexing_simp(evil2, "a"*i + "b"))) +} + +for(i <- 1 to 10000 by 1000) { + println(time_needed(1, blexing_simp(evil3, "a"*i))) +} +*/ + + + + + +///////////////////////// +///////////////////////// +///// Below not relevant + +/* +val rf = ("a" | "ab") ~ ("ab" | "") +println(pre_blexing(internalise(rf), "ab")) +println(blexing(rf, "ab")) +println(blexing_simp(rf, "ab")) + +val r0 = ("a" | "ab") ~ ("b" | "") +println(pre_blexing(internalise(r0), "ab")) +println(blexing(r0, "ab")) +println(blexing_simp(r0, "ab")) + +val r1 = ("a" | "ab") ~ ("bcd" | "cd") +println(blexing(r1, "abcd")) +println(blexing_simp(r1, "abcd")) + +println(blexing((("" | "a") ~ ("ab" | "b")), "ab")) +println(blexing_simp((("" | "a") ~ ("ab" | "b")), "ab")) + +println(blexing((("" | "a") ~ ("b" | "ab")), "ab")) +println(blexing_simp((("" | "a") ~ ("b" | "ab")), "ab")) + +println(blexing((("" | "a") ~ ("c" | "ab")), "ab")) +println(blexing_simp((("" | "a") ~ ("c" | "ab")), "ab")) + + +// Sulzmann's tests +//================== + +val sulzmann = ("a" | "b" | "ab").% + +println(blexing(sulzmann, "a" * 10)) +println(blexing_simp(sulzmann, "a" * 10)) + +for (i <- 0 to 4000 by 500) { + println(i + ": " + "%.5f".format(time_needed(1, blexing_simp(sulzmann, "a" * i)))) +} + +for (i <- 0 to 15 by 5) { + println(i + ": " + "%.5f".format(time_needed(1, blexing_simp(sulzmann, "ab" * i)))) +} + +*/ + + +// some automatic testing + +/* +def clear() = { + print("") + //print("\33[H\33[2J") +} + +def merge[A](l1: LazyList[A], l2: LazyList[A], l3: LazyList[A]) : LazyList[A] = + l1.head #:: l2.head #:: l3.head #:: merge(l1.tail, l2.tail, l3.tail) + + +// enumerates regular expressions until a certain depth +def enum(rs: LazyList[Rexp]) : LazyList[Rexp] = { + rs #::: enum( (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++ + (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++ + (for (r1 <- rs) yield STAR(r1))) +} + + +enum(LazyList(ZERO, ONE, CHAR('a'), CHAR('b'))).take(200).force.mkString("\n") +enum(LazyList(ZERO, ONE, CHAR('a'), CHAR('b'))).take(200_000).force + + + +import scala.util.Try + +def test_mkeps(r: Rexp) = { + val res1 = Try(Some(mkeps(r))).getOrElse(None) + val res2 = Try(Some(decode(r, bmkeps(internalise(r))))).getOrElse(None) + if (res1 != res2) println(s"Mkeps disagrees on ${r}") + if (res1 != res2) Some(r) else (None) +} + +println("Testing mkeps") +enum(LazyList(ZERO, ONE, CHAR('a'), CHAR('b'))).take(100).exists(test_mkeps(_).isDefined) +//enum(3, "ab").map(test_mkeps).toSet +//enum(3, "abc").map(test_mkeps).toSet + + +//enumerates strings of length n over alphabet cs +def strs(n: Int, cs: String) : Set[String] = { + if (n == 0) Set("") + else { + val ss = strs(n - 1, cs) + ss ++ + (for (s <- ss; c <- cs.toList) yield c + s) + } +} + +//tests lexing and lexingB +def tests_inj(ss: Set[String])(r: Rexp) = { + clear() + println(s"Testing ${r}") + for (s <- ss.par) yield { + val res1 = Try(Some(alexing(r, s))).getOrElse(None) + val res2 = Try(Some(alexing_simp(r, s))).getOrElse(None) + if (res1 != res2) println(s"Disagree on ${r} and ${s}") + if (res1 != res2) println(s" ${res1} != ${res2}") + if (res1 != res2) Some((r, s)) else None + } +} + +//println("Testing lexing 1") +//enum(2, "ab").map(tests_inj(strs(2, "ab"))).toSet +//println("Testing lexing 2") +//enum(2, "ab").map(tests_inj(strs(3, "abc"))).toSet +//println("Testing lexing 3") +//enum(3, "ab").map(tests_inj(strs(3, "abc"))).toSet + + +def tests_alexer(ss: Set[String])(r: Rexp) = { + clear() + println(s"Testing ${r}") + for (s <- ss.par) yield { + val d = der('b', r) + val ad = bder('b', internalise(r)) + val res1 = Try(Some(encode(inj(r, 'a', alexing(d, s))))).getOrElse(None) + val res2 = Try(Some(pre_alexing(ad, s))).getOrElse(None) + if (res1 != res2) println(s"Disagree on ${r} and 'a'::${s}") + if (res1 != res2) println(s" ${res1} != ${res2}") + if (res1 != res2) Some((r, s)) else None + } +} + +println("Testing alexing 1") +println(enum(2, "ab").map(tests_alexer(strs(2, "ab"))).toSet) + + +def values(r: Rexp) : Set[Val] = r match { + case ZERO => Set() + case ONE => Set(Empty) + case CHAR(c) => Set(Chr(c)) + case ALT(r1, r2) => (for (v1 <- values(r1)) yield Left(v1)) ++ + (for (v2 <- values(r2)) yield Right(v2)) + case SEQ(r1, r2) => for (v1 <- values(r1); v2 <- values(r2)) yield Sequ(v1, v2) + case STAR(r) => (Set(Stars(Nil)) ++ + (for (v <- values(r)) yield Stars(List(v)))) + // to do more would cause the set to be infinite +} + +def tests_bder(c: Char)(r: Rexp) = { + val d = der(c, r) + val vals = values(d) + for (v <- vals) { + println(s"Testing ${r} and ${v}") + val res1 = retrieve(bder(c, internalise(r)), v) + val res2 = encode(inj(r, c, decode(d, retrieve(internalise(der(c, r)), v)))) + if (res1 != res2) println(s"Disagree on ${r}, ${v} and der = ${d}") + if (res1 != res2) println(s" ${res1} != ${res2}") + if (res1 != res2) Some((r, v)) else None + } +} + +println("Testing bder/der") +println(enum(2, "ab").map(tests_bder('a')).toSet) + +val er = SEQ(ONE,CHAR('a')) +val ev = Right(Empty) +val ed = ALT(SEQ(ZERO,CHAR('a')),ONE) + +retrieve(internalise(ed), ev) // => [true] +internalise(er) +bder('a', internalise(er)) +retrieve(bder('a', internalise(er)), ev) // => [] +decode(ed, List(true)) // gives the value for derivative +decode(er, List()) // gives the value for original value + + +val dr = STAR(CHAR('a')) +val dr_der = SEQ(ONE,STAR(CHAR('a'))) // derivative of dr +val dr_val = Sequ(Empty,Stars(List())) // value of dr_def + + +val res1 = retrieve(internalise(der('a', dr)), dr_val) // => [true] +val res2 = retrieve(bder('a', internalise(dr)), dr_val) // => [false, true] +decode(dr_der, res1) // gives the value for derivative +decode(dr, res2) // gives the value for original value + +encode(inj(dr, 'a', decode(dr_der, res1))) + +*/
--- a/progs/sml/re.ML Wed Sep 18 16:35:57 2019 +0100 +++ b/progs/sml/re.ML Sat Oct 24 12:13:39 2020 +0100 @@ -132,9 +132,7 @@ | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) | (CHAR(d), Empty) => Chr(d) | (RECD(x, r1), _) => Rec(x, inj r1 c v) - | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); - print ("v: " ^ PolyML.makestring v ^ "\n"); - raise Error) + | _ => raise Error (* some "rectification" functions for simplification *) fun f_id v = v @@ -338,8 +336,8 @@ val prog = "ab"; val reg = ("x" $ ((str "a") -- (str "b"))); -print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n"); -print("Acc: " ^ PolyML.makestring (lexing_acc reg prog) ^ "\n"); +print("Simp: " ^ (lexing_simp reg prog) ^ "\n"); +print("Acc: " ^ (lexing_acc reg prog) ^ "\n"); print("Env " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n"); fun fst (x, y) = x;
--- a/text/langs.sty Wed Sep 18 16:35:57 2019 +0100 +++ b/text/langs.sty Sat Oct 24 12:13:39 2020 +0100 @@ -59,7 +59,7 @@ stringstyle=\color{codegreen}, commentstyle=\color{codegreen}, morecomment=[s][\color{codedocblue}]{/**}{*/}, - numbers=none, + numbers={}, numberstyle=\tiny\color{black}, stepnumber=1, numbersep=10pt,
--- a/text/proposal.tex Wed Sep 18 16:35:57 2019 +0100 +++ b/text/proposal.tex Sat Oct 24 12:13:39 2020 +0100 @@ -1,9 +1,10 @@ +% !TEX program = pfdlatex \documentclass{article} \usepackage{url} \usepackage{color}\definecolor{darkblue}{rgb}{0,0,0.5} \usepackage[a4paper,colorlinks=true,linkcolor=darkblue,citecolor=darkblue,filecolor=darkblue,pagecolor=darkblue,urlcolor=darkblue]{hyperref} \usepackage{style} -\usepackage{langs} +%\usepackage{langs} \usepackage{tikz} \usepackage{pgf} \usepackage{pgfplots} @@ -11,12 +12,12 @@ \addtolength{\oddsidemargin}{-6mm} \addtolength{\evensidemargin}{-6mm} -\addtolength{\textwidth}{12mm} +\addtolength{\textwidth}{12mm} %% \usepackage{accents} -\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} +\newcommand\barbelow[1]{\stackunbder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} \begin{filecontents}{re-python2.data} 1 0.033 @@ -88,7 +89,7 @@ \section*{PPProposal: \\ Fast Regular Expression Matching\\ - with Brzozowski Derivatives} + with Brzozowski bderivatives} \subsubsection*{Summary} @@ -102,19 +103,19 @@ network intrusion prevention systems that use regular expressions for traffic analysis are Snort and Bro. -Given the large -volume of Internet traffic even the smallest servers can handle -nowadays, the regular expressions for traffic analysis have become a -real bottleneck. This is not just a nuisance, but actually a security -vulnerability in itself: it can lead to denial-of-service attacks. -The proposed project aims to remove this bottleneck by using a little-known -technique of building derivatives of regular expressions. These -derivatives have not yet been used in the area of network traffic -analysis, but have the potential to solve some tenacious problems with -existing approaches. The project will require theoretical work, but -also a practical implementation (a proof-of-concept at least). The -work will build on earlier work by Ausaf and Urban from King's College -London \cite{AusafDyckhoffUrban2016}. +Given the large volume of Internet traffic even the smallest servers +can handle nowadays, the regular expressions for traffic analysis have +become a real bottleneck. This is not just a nuisance, but actually a +security vulnerability in itself: it can lead to denial-of-service +attacks. The proposed project aims to remove this bottleneck by using +a little-known technique of building bderivatives of regular +expressions. These derivatives have not yet been used in the area of +network traffic analysis, but have the potential to solve some +tenacious problems with existing approaches. The project will require +theoretical work, but also a practical implementation (a +proof-of-concept at least). The work will build on earlier work by +Ausaf and Urban from King's College London +\cite{AusafDyckhoffUrban2016}. \subsubsection*{Background} @@ -168,10 +169,10 @@ mistakes that can be abused to gain access to a computer. This means server administrators have a suite of sometimes thousands of regular expressions, all prescribing some suspicious pattern for when a -computer is under attack. +computer is unbder attack. -The underlying algorithmic problem is to decide whether a string in +The unbderlying algorithmic problem is to decide whether a string in the logs matches the patterns determined by the regular expressions. Such decisions need to be done as fast as possible, otherwise the scanning programs would not be useful as a hardening @@ -207,7 +208,7 @@ \begin{center} \begin{tabular}{@{}cc@{}} %\multicolumn{2}{c}{Graph: $(a^*)^*\cdot b$ and strings -% $\underbrace{a\ldots a}_{n}$}\smallskip\\ +% $\unbderbrace{a\ldots a}_{n}$}\smallskip\\ \begin{tikzpicture} \begin{axis}[ xlabel={$n$}, @@ -259,7 +260,7 @@ matchers in the popular programming languages Python and Java (Version 8 and Version 9). There are many more regular expressions that show the same behaviour. On the left-hand side the graphs show that for a -string consisting of just 28 $a\,$'s, Python and the older Java (which +string consisting of just 28 $a\,$'s, Python and the olbder Java (which was the latest version of Java until September 2017) already need 30 seconds to decide whether this string is matched or not. This is an abysmal result given that Python and Java are very popular programming @@ -272,7 +273,7 @@ $a\,$'s in 30 seconds---however this would still not be good enough for being a useful tool for network traffic analysis. What is interesting is that even a relatively simple-minded regular expression -matcher based on Brzozowski derivatives can out-compete the regular +matcher based on Brzozowski bderivatives can out-compete the regular expressions matchers in Java and Python on such catastrophic backtracking examples. This gain in speed is the motivation and starting point for the proposed project. \smallskip @@ -302,21 +303,21 @@ \noindent For practical purposes, however, regular expressions have been -extended in various ways in order to make them even more powerful and +extended in various ways in orbder to make them even more powerful and even more useful for applications. Some of the problems to do with catastrophic backtracking stem, unfortunately, from these extensions. The crux in this project is to not use automata for regular expression -matching (the traditional technique), but to use derivatives of -regular expressions instead. These derivatives were introduced by +matching (the traditional technique), but to use bderivatives of +regular expressions instead. These bderivatives were introduced by Brzozowski in 1964 \cite{Brzozowski1964}, but somehow had been lost ``in the sands of time'' \cite{Owens2009}. However, they recently have become again a ``hot'' research topic with numerous research papers appearing in the last five years. One reason for this interest is that -Brzozowski derivatives straightforwardly extend to more general +Brzozowski bderivatives straightforwardly extend to more general regular expression constructors, which cannot be easily treated with standard techniques. They can also be implemented with ease in any -functional programming language: the definition for derivatives +functional programming language: the definition for bderivatives consists of just two simple recursive functions shown in Figure~\ref{f}. Moreover, it can be easily shown (by a mathematical proof) that the resulting regular matcher is in fact always producing @@ -336,17 +337,17 @@ $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\ $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\medskip\\ -$\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\ -$\textit{der}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\ -$\textit{der}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\ -$\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\ -$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ - & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\ - & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\ -$\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\[-5mm] +$\textit{bder}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\ +$\textit{bder}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\ +$\textit{bder}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\ +$\textit{bder}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{bder}\;c\;r_1) + (\textit{bder}\;c\;r_2)$\\ +$\textit{bder}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ + & & $\textit{then}\;((\textit{bder}\;c\;r_1)\cdot r_2) + (\textit{bder}\;c\;r_2)$\\ + & & $\textit{else}\;(\textit{bder}\;c\;r_1)\cdot r_2$\\ +$\textit{bder}\;c\;(r^*)$ & $\dn$ & $(\textit{bder}\;c\;r)\cdot (r^*)$\\[-5mm] \end{tabular} \end{center} -\caption{The complete definition of derivatives of regular expressions +\caption{The complete definition of bderivatives of regular expressions \cite{Brzozowski1964}. This definition can be implemented with ease in functional programming languages and can be easily extended to regular expressions used in practice. The are more flexible for applications and easier to manipulate in @@ -354,17 +355,17 @@ matching.\medskip\label{f}} \end{figure} -There are a number of avenues for research on Brzozowski derivatives. +There are a number of avenues for research on Brzozowski bderivatives. One problem I like to immediately tackle in this project is how to handle \emph{back-references} in regular expressions. It is known that -the inclusion of back-references causes the underlying algorithmic +the inclusion of back-references causes the unbderlying algorithmic problem to become NP-hard \cite{Aho}, and is the main reason why regular expression matchers suffer from the catastrophic backtracking problem. However, the full generality of back-references are not needed in practical applications. The goal therefore is to sufficiently restrict the problem so that an efficient algorithm can be designed. There seem to be good indications that Brzozowski -derivatives are useful for that. +bderivatives are useful for that. Another problem is \emph{how} regular expressions match a string \cite{Sulzmann2014}. In this case the algorithm does not just give a @@ -390,8 +391,8 @@ from the truth. In fact regular expressions have become a ``hot'' research topic in recent years because of problems with the traditional methods (in applications such as network traffic -analysis), but also because the technique of derivatives of regular -expression has been re-discovered. These derivatives provide an +analysis), but also because the technique of bderivatives of regular +expression has been re-discovered. These bderivatives provide an alternative approach to regular expression matching. Their potential lies in the fact that they are more flexible in implementations and much easier to manipulate in mathematical proofs. Therefore I like to @@ -420,6 +421,24 @@ \renewcommand{\refname}{References\\[-7mm]\mbox{}} \bibliography{proposal} + +\newpage +\begin{center}\small +\begin{tabular}{lcl} +$\textit{bder}\;c\;_{[]}(\ZERO)$ & $\dn$ & $_{[]}\ZERO$\\ +$\textit{bder}\;c\;_{bs}(\ONE)$ & $\dn$ & $_{[]}\ZERO$\\ +$\textit{bder}\;c\;_{bs}(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;_{bs}\ONE \; \textit{else} \;_{[]}\ZERO$\\ +$\textit{bder}\;c\;_{bs}(r_1 + r_2)$ & $\dn$ & $_{bs}(\textit{bder}\;c\;r_1 + \textit{bder}\;c\;r_2)$\\ +$\textit{bder}\;c\;_{bs}(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ + & & $\textit{then}\;_{bs}(_{[]}(\textit{bder}\;c\;r_1)\cdot r_2) + (_{\textit{bmkeps}\,r_1\rightarrow}\textit{bder}\;c\;r_2)$\\ + & & $\textit{else}\;_{bs}(\textit{bder}\;c\;r_1)\cdot r_2$\\ +$\textit{bder}\;c\;_{bs}(r^*)$ & $\dn$ & $_{bs}(_{0\rightarrow}\textit{bder}\;c\;r)\cdot (r^*)$\\[-5mm] +\end{tabular} +\end{center} + + + + \end{document}
--- a/thys/BitCoded2.thy Wed Sep 18 16:35:57 2019 +0100 +++ b/thys/BitCoded2.thy Sat Oct 24 12:13:39 2020 +0100 @@ -170,7 +170,7 @@ | "intern (ALT r1 r2) = AALT [] (fuse [Z] (intern r1)) (fuse [S] (intern r2))" | "intern (SEQ r1 r2) = ASEQ [] (intern r1) (intern r2)" -| "intern (STAR r) = ASTAR [] (intern r)" +| "intern (STAR r) = ASTAR [S] (intern r)" fun retrieve :: "arexp \<Rightarrow> val \<Rightarrow> bit list" where @@ -203,7 +203,7 @@ | "bmkeps(ASEQ bs r1 r2) = bs @ (bmkeps r1) @ (bmkeps r2)" | "bmkeps(AALTs bs [r]) = bs @ (bmkeps r)" | "bmkeps(AALTs bs (r#rs)) = (if bnullable(r) then bs @ (bmkeps r) else (bmkeps (AALTs bs rs)))" -| "bmkeps(ASTAR bs r) = bs @ [S]" +| "bmkeps(ASTAR bs r) = bs" fun @@ -217,7 +217,15 @@ (if bnullable r1 then AALT bs (ASEQ [] (bder c r1) r2) (fuse (bmkeps r1) (bder c r2)) else ASEQ bs (bder c r1) r2)" -| "bder c (ASTAR bs r) = ASEQ bs (fuse [Z] (bder c r)) (ASTAR [] r)" +| "bder c (ASTAR bs r) = ASEQ (butlast bs) (fuse [Z] (bder c r)) (ASTAR [S] r)" + + + +lemma bder_fuse: + "fuse bs (bder c r) = bder c (fuse bs r)" + apply(induct r arbitrary: bs) + apply(simp_all) + done fun @@ -301,14 +309,6 @@ by (simp_all add: retrieve_fuse2) -lemma retrieve_code: - assumes "\<Turnstile> v : r" - shows "code v = retrieve (intern r) v" - using assms - apply(induct v r ) - apply(simp_all add: retrieve_fuse retrieve_encode_STARS) - done - lemma r: assumes "bnullable (AALTs bs (a # rs))" shows "bnullable a \<or> (\<not> bnullable a \<and> bnullable (AALTs bs rs))" @@ -410,148 +410,6 @@ apply(auto) using r3 by blast -lemma bmkeps_retrieve: - assumes "nullable (erase r)" - shows "bmkeps r = retrieve r (mkeps (erase r))" - using assms - apply(induct r) - apply(simp) - apply(simp) - apply(simp) - apply(simp) - defer - apply(simp) - apply(rule t) - apply(auto) - done - -lemma bder_retrieve: - assumes "\<Turnstile> v : der c (erase r)" - shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)" - using assms - apply(induct r arbitrary: v rule: erase.induct) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(case_tac "c = ca") - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(case_tac rs) - apply(simp) - apply(simp) - apply (smt Prf_elims(3) injval.simps(2) injval.simps(3) retrieve.simps(4) retrieve.simps(5) same_append_eq) - apply(simp) - apply(case_tac "nullable (erase r1)") - apply(simp) - apply(erule Prf_elims) - apply(subgoal_tac "bnullable r1") - prefer 2 - using bnullable_correctness apply blast - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(subgoal_tac "bnullable r1") - prefer 2 - using bnullable_correctness apply blast - apply(simp) - apply(simp add: retrieve_fuse2) - apply(simp add: bmkeps_retrieve) - apply(simp) - apply(erule Prf_elims) - apply(simp) - using bnullable_correctness apply blast - apply(rename_tac bs r v) - apply(simp) - apply(erule Prf_elims) - apply(clarify) - apply(erule Prf_elims) - apply(clarify) - apply(subst injval.simps) - apply(simp del: retrieve.simps) - apply(subst retrieve.simps) - apply(subst retrieve.simps) - apply(simp) - apply(simp add: retrieve_fuse2) - done - - - -lemma MAIN_decode: - assumes "\<Turnstile> v : ders s r" - shows "Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" - using assms -proof (induct s arbitrary: v rule: rev_induct) - case Nil - have "\<Turnstile> v : ders [] r" by fact - then have "\<Turnstile> v : r" by simp - then have "Some v = decode (retrieve (intern r) v) r" - using decode_code retrieve_code by auto - then show "Some (flex r id [] v) = decode (retrieve (bders (intern r) []) v) r" - by simp -next - case (snoc c s v) - have IH: "\<And>v. \<Turnstile> v : ders s r \<Longrightarrow> - Some (flex r id s v) = decode (retrieve (bders (intern r) s) v) r" by fact - have asm: "\<Turnstile> v : ders (s @ [c]) r" by fact - then have asm2: "\<Turnstile> injval (ders s r) c v : ders s r" - by (simp add: Prf_injval ders_append) - have "Some (flex r id (s @ [c]) v) = Some (flex r id s (injval (ders s r) c v))" - by (simp add: flex_append) - also have "... = decode (retrieve (bders (intern r) s) (injval (ders s r) c v)) r" - using asm2 IH by simp - also have "... = decode (retrieve (bder c (bders (intern r) s)) v) r" - using asm by (simp_all add: bder_retrieve ders_append) - finally show "Some (flex r id (s @ [c]) v) = - decode (retrieve (bders (intern r) (s @ [c])) v) r" by (simp add: bders_append) -qed - - -definition blex where - "blex a s \<equiv> if bnullable (bders a s) then Some (bmkeps (bders a s)) else None" - - - -definition blexer where - "blexer r s \<equiv> if bnullable (bders (intern r) s) then - decode (bmkeps (bders (intern r) s)) r else None" - -lemma blexer_correctness: - shows "blexer r s = lexer r s" -proof - - { define bds where "bds \<equiv> bders (intern r) s" - define ds where "ds \<equiv> ders s r" - assume asm: "nullable ds" - have era: "erase bds = ds" - unfolding ds_def bds_def by simp - have mke: "\<Turnstile> mkeps ds : ds" - using asm by (simp add: mkeps_nullable) - have "decode (bmkeps bds) r = decode (retrieve bds (mkeps ds)) r" - using bmkeps_retrieve - using asm era by (simp add: bmkeps_retrieve) - also have "... = Some (flex r id s (mkeps ds))" - using mke by (simp_all add: MAIN_decode ds_def bds_def) - finally have "decode (bmkeps bds) r = Some (flex r id s (mkeps ds))" - unfolding bds_def ds_def . - } - then show "blexer r s = lexer r s" - unfolding blexer_def lexer_flex - apply(subst bnullable_correctness[symmetric]) - apply(simp) - done -qed lemma asize0: shows "0 < asize r" @@ -565,24 +423,27 @@ apply(auto) done -lemma bder_fuse: - shows "bder c (fuse bs a) = fuse bs (bder c a)" - apply(induct a arbitrary: bs c) - apply(simp_all) - done - -lemma bders_fuse: - shows "bders (fuse bs a) s = fuse bs (bders a s)" - apply(induct s arbitrary: bs a) - apply(simp_all) - by (simp add: bder_fuse) - - -lemma map_bder_fuse: - shows "map (bder c \<circ> fuse bs1) as1 = map (fuse bs1) (map (bder c) as1)" - apply(induct as1) - apply(auto simp add: bder_fuse) - done +lemma TESTTEST: + shows "bder c (intern r) = intern (der c r)" + apply(induct r) + apply(simp) + apply(simp) + apply(simp) + prefer 2 + apply(simp) + apply (simp add: bder_fuse[symmetric]) + prefer 3 + apply(simp only: intern.simps) + apply(simp only: der.simps) + apply(simp only: intern.simps) + apply(simp only: bder.simps) + apply(simp) + apply(simp only: intern.simps) + prefer 2 + apply(simp) + prefer 2 + apply(simp) + apply(auto) fun nonnested :: "arexp \<Rightarrow> bool" @@ -671,6 +532,8 @@ | "ASTAR bs r >> bs @ [S]" | "\<lbrakk>r >> bs1; ASTAR [] r >> bs2\<rbrakk> \<Longrightarrow> ASTAR bs r >> bs @ Z # bs1 @ bs2" + + lemma contains0: assumes "a >> bs" shows "(fuse bs1 a) >> bs1 @ bs" @@ -899,6 +762,8 @@ using Prf_flex assms contains6 contains7b by blast + + fun bders_simp :: "arexp \<Rightarrow> string \<Rightarrow> arexp" where @@ -2060,6 +1925,14 @@ apply(auto) using contains0 by blast +lemma test1: + shows "AALT [] (ACHAR [Z] c) (ACHAR [S] c) >> [S]" + by (metis contains.intros(2) contains.intros(4) contains.intros(5) self_append_conv2) + +lemma test1a: + shows "bsimp (AALT [] (ACHAR [Z] c) (ACHAR [S] c)) = AALT [] (ACHAR [Z] c) (ACHAR [S] c)" + apply(simp) + done lemma q3a: assumes "\<exists>r \<in> set rs. bnullable r" @@ -3039,6 +2912,14 @@ shows "bsimp r >> bs \<longleftrightarrow> r >> bs" using contains55 contains55a by blast + +definition "SET a \<equiv> {bs . a >> bs}" + +lemma "SET(bsimp a) \<subseteq> SET(a)" + unfolding SET_def + apply(auto simp add: PPP1_eq) + done + lemma retrieve_code_bder: assumes "\<Turnstile> v : der c r" shows "code (injval r c v) = retrieve (bder c (intern r)) v" @@ -3078,8 +2959,15 @@ +lemma PPP: + assumes "\<Turnstile> v : r" + shows "intern r >> (retrieve (intern r) v)" + using assms + using contains5 by blast + + @@ -3963,36 +3851,20 @@ (* HERE *) lemma PX: - assumes "s \<in> L r" - shows "bders (intern r) s >> code (PX r s) \<Longrightarrow> bders (bsimp (intern r)) s >> code (PX r s)" - - using FC_def contains7b - using assms by me tis - - - - - apply(simp) - (*using FC_bders_iff2[of _ _ "[b]", simplified]*) - apply(subst (asm) FC_bders_iff2[of _ _ "[b]", simplified]) - apply(simp) - apply(subst (asm) FC_bder_iff) + assumes "s \<in> L r" "bders (intern r) s >> code (PX r s)" + shows "bders (bsimp (intern r)) s >> code (PX r s)" + using assms + apply(induct s arbitrary: r rule: rev_induct) apply(simp) - apply(drule bder_simp_contains) - using FC_bder_iff FC_bders_iff2 FC_bders_iff - apply(subst (asm) FC_bder_iff[symmetric]) - apply(subst FC_bder_iff) - using FC_bder_iff - - - apply (simp add: bder_simp_contains) - -lemma bder_simp_contains_IFF2: - assumes "bders r s >> bs" - shows "" - using assms - apply(induct s arbitrary: r bs rule: rev_induct) - apply(simp) + apply (simp add: PPP1_eq) + apply (simp add: bders_append bders_simp_append) + thm PX_bder_iff PX_bders_iff + apply(subst (asm) PX_bder_iff) + apply(assumption) + apply(subst (asm) (2) PX_bders_iff) + find_theorems "_ >> code (PX _ _)" + find_theorems "PX _ _ = _" + find_theorems "(intern _) >> _" apply (simp add: contains55) apply (simp add: bders_append bders_simp_append) apply (simp add: PPP1_eq)
--- a/thys/PDerivs.thy Wed Sep 18 16:35:57 2019 +0100 +++ b/thys/PDerivs.thy Sat Oct 24 12:13:39 2020 +0100 @@ -128,7 +128,7 @@ lemma pders_Set_CHAR [simp]: shows "pders_Set UNIV1 (CHAR c) = {ONE}" -unfolding UNIV1_def pders_Set_def +unfolding UNIV1_def pders_Set_def apply(auto) apply(frule rev_subsetD) apply(rule pders_CHAR)
--- a/thys/RegLangs.thy Wed Sep 18 16:35:57 2019 +0100 +++ b/thys/RegLangs.thy Sat Oct 24 12:13:39 2020 +0100 @@ -20,8 +20,22 @@ lemma Sequ_empty [simp]: shows "A ;; {} = {}" and "{} ;; A = {}" -by (simp_all add: Sequ_def) + by (simp_all add: Sequ_def) + +lemma + shows "(A \<union> B) ;; C = (A ;; C) \<union> (B ;; C)" + apply(auto simp add: Sequ_def) + done +lemma + shows "(A \<inter> B) ;; C \<subseteq> (A ;; C) \<inter> (B ;; C)" + apply(auto simp add: Sequ_def) + done + +lemma + shows "(A ;; C) \<inter> (B ;; C) \<subseteq> (A \<inter> B) ;; C" + apply(auto simp add: Sequ_def) + oops section {* Semantic Derivative (Left Quotient) of Languages *}
--- a/thys/Spec.thy Wed Sep 18 16:35:57 2019 +0100 +++ b/thys/Spec.thy Sat Oct 24 12:13:39 2020 +0100 @@ -4,6 +4,8 @@ begin + + section {* "Plain" Values *} datatype val =
--- a/thys/SpecExt.thy Wed Sep 18 16:35:57 2019 +0100 +++ b/thys/SpecExt.thy Sat Oct 24 12:13:39 2020 +0100 @@ -163,7 +163,11 @@ using assms apply(induct n arbitrary: s) apply(auto simp add: Sequ_def) -done + done + +lemma + assumes "[] \<in> A" "n \<noteq> 0" "A \<noteq> {}" + shows "A \<up> (Suc n) = A \<up> n" lemma Der_Pow_0: shows "Der c (A \<up> 0) = {}" @@ -225,6 +229,7 @@ | NTIMES rexp nat | FROMNTIMES rexp nat | NMTIMES rexp nat nat +| NOT rexp section {* Semantics of Regular Expressions *} @@ -241,6 +246,7 @@ | "L (NTIMES r n) = (L r) \<up> n" | "L (FROMNTIMES r n) = (\<Union>i\<in>{n..} . (L r) \<up> i)" | "L (NMTIMES r n m) = (\<Union>i\<in>{n..m} . (L r) \<up> i)" +| "L (NOT r) = ((UNIV:: string set) - L r)" section {* Nullable, Derivatives *} @@ -257,6 +263,7 @@ | "nullable (NTIMES r n) = (if n = 0 then True else nullable r)" | "nullable (FROMNTIMES r n) = (if n = 0 then True else nullable r)" | "nullable (NMTIMES r n m) = (if m < n then False else (if n = 0 then True else nullable r))" +| "nullable (NOT r) = (\<not> nullable r)" fun der :: "char \<Rightarrow> rexp \<Rightarrow> rexp" @@ -281,6 +288,7 @@ else (if n = 0 then (if m = 0 then ZERO else SEQ (der c r) (UPNTIMES r (m - 1))) else SEQ (der c r) (NMTIMES r (n - 1) (m - 1))))" +| "der c (NOT r) = NOT (der c r)" lemma "L(der c (UPNTIMES r m)) = @@ -295,6 +303,26 @@ apply blast oops + + +lemma + assumes "der c r = ONE \<or> der c r = ZERO" + shows "L (der c (NOT r)) \<noteq> L(if (der c r = ZERO) then ONE else + if (der c r = ONE) then ZERO + else NOT(der c r))" + using assms + apply(simp) + apply(auto) + done + +lemma + "L (der c (NOT r)) = L(if (der c r = ZERO) then ONE else + if (der c r = ONE) then ZERO + else NOT(der c r))" + apply(simp) + apply(auto) + oops + lemma pow_add: assumes "s1 \<in> A \<up> n" "s2 \<in> A \<up> m" shows "s1 @ s2 \<in> A \<up> (n + m)" @@ -344,6 +372,16 @@ apply(auto simp add: Sequ_def) using SpecExt.Pow_empty by blast +abbreviation "FROM \<equiv> FROMNTIMES" + +lemma + shows "L (der c (FROM r n)) = + L (if n <= 0 then SEQ (der c r) (ALT ONE (FROM r 0)) + else SEQ (der c r) (ALT ZERO (FROM r (n -1))))" + apply(auto simp add: Sequ_def) + oops + + fun ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp" where @@ -365,7 +403,15 @@ apply(simp add: nullable_correctness del: Der_UNION) apply(simp add: nullable_correctness del: Der_UNION) apply(simp add: nullable_correctness del: Der_UNION) -prefer 2 + prefer 2 + apply(simp only: der.simps) + apply(case_tac "x2 = 0") + apply(simp) + apply(simp del: Der_Sequ L.simps) + apply(subst L.simps) + apply(subst (2) L.simps) + thm Der_UNION + apply(simp add: nullable_correctness del: Der_UNION) apply(simp add: nullable_correctness del: Der_UNION) apply(rule impI)