# HG changeset patch # User Christian Urban # Date 1614293218 0 # Node ID 8bb064045b4e3a9f424445c4a83f46e910eb3f32 # Parent e752d84225ec7b818ac3f07d57b785ebcc4ff617 updated diff -r e752d84225ec -r 8bb064045b4e Literature/huet-zipper.pdf Binary file Literature/huet-zipper.pdf has changed diff -r e752d84225ec -r 8bb064045b4e Literature/ziper-ders.pdf Binary file Literature/ziper-ders.pdf has changed diff -r e752d84225ec -r 8bb064045b4e progs/scala/re-annotated.sc --- a/progs/scala/re-annotated.sc Mon Feb 22 03:22:26 2021 +0000 +++ b/progs/scala/re-annotated.sc Thu Feb 25 22:46:58 2021 +0000 @@ -50,6 +50,41 @@ case class Right(v: Val) extends Val case class Stars(vs: List[Val]) extends Val case class Recd(x: String, v: Val) extends Val + +abstract class BTree +case object LZ extends BTree +case class L(bs: Bits) extends BTree +case class LALTS(bs: Bits, bts: List[BTree]) extends BTree +case class LSEQ(bs: Bits, bt1: BTree, bt2: BTree) extends BTree +case class LSTAR(bs: Bits, bt: BTree) extends BTree + +def ext(r: ARexp): BTree = r match { + case AZERO => LZ + case AONE(bs) => L(bs) + case ACHAR(bs, _) => L(bs) + case AALTS(bs, rs) => LALTS(bs, rs.map(ext)) + case ASEQ(bs, r1, r2) => LSEQ(bs, ext(r1), ext(r2)) + case ASTAR(bs, r) => LSTAR(bs, ext(r)) +} + +// annotated regular expressions +abstract class BRexp +case object BZERO extends BRexp +case object BONE extends BRexp +case class BCHAR(c: Char) extends BRexp +case class BALTS(rs: List[BRexp]) extends BRexp +case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp +case class BSTAR(r: BRexp) extends BRexp + +def ex(r: ARexp): BRexp = r match { + case AZERO => BZERO + case AONE(_) => BONE + case ACHAR(_, c) => BCHAR(c) + case AALTS(_, rs) => BALTS(rs.map(ex)) + case ASEQ(_, r1, r2) => BSEQ(ex(r1), ex(r2)) + case ASTAR(_, r) => BSTAR(ex(r)) +} + // some convenience for typing in regular expressions def charlist2rexp(s: List[Char]): Rexp = s match { @@ -234,7 +269,6 @@ // example by Tudor //val reg = (STAR("a") ~ ("b" | "c")).% - //println(blexing(reg, "aab")) diff -r e752d84225ec -r 8bb064045b4e progs/scala/re-annotated2.sc --- a/progs/scala/re-annotated2.sc Mon Feb 22 03:22:26 2021 +0000 +++ b/progs/scala/re-annotated2.sc Thu Feb 25 22:46:58 2021 +0000 @@ -51,15 +51,7 @@ // an abbreviation for binary alternatives def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Recd(x: String, v: Val) extends Val - + // some convenience for typing in regular expressions def charlist2rexp(s: List[Char]): Rexp = s match { case Nil => ONE @@ -83,17 +75,6 @@ def $ (r: Rexp) = RECD(s, r) } -def size(r: Rexp) : Int = r match { - case ZERO => 1 - case ONE => 1 - case ALT(r1, r2) => 1 + size(r1) + size(r2) - case SEQ(r1, r2) => 1 + size(r1) + size(r2) - case STAR(r) => 1 + size(r) - case RECD(_, r) => 1 + size(r) - case CHARSET(_) => 1 -} - - // Bitcoded + Annotation //======================= @@ -134,43 +115,8 @@ // internalise(("a" | "ab") ~ ("b" | "")) -// decoding of a value from a bitsequence -// (this is not tail-recursive and therefore a potential bottleneck) -def vdecode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { - case (ONE, bs) => (Empty, bs) - case (ALT(r1, r2), Z::bs) => { - val (v, bs1) = vdecode_aux(r1, bs) - (Left(v), bs1) - } - case (ALT(r1, r2), S::bs) => { - val (v, bs1) = vdecode_aux(r2, bs) - (Right(v), bs1) - } - case (SEQ(r1, r2), bs) => { - val (v1, bs1) = vdecode_aux(r1, bs) - val (v2, bs2) = vdecode_aux(r2, bs1) - (Sequ(v1, v2), bs2) - } - case (STAR(r1), Z::bs) => { - val (v, bs1) = vdecode_aux(r1, bs) - val (Stars(vs), bs2) = vdecode_aux(STAR(r1), bs1) - (Stars(v::vs), bs2) - } - case (STAR(_), S::bs) => (Stars(Nil), bs) - case (RECD(s, r1), bs) => - val (v, bs1) = vdecode_aux(r1, bs) - (Recd(s, v), bs1) - case (CHARSET(_), C(c)::bs) => (Chr(c), bs) -} - -def vdecode(r: Rexp, bs: Bits) = vdecode_aux(r, bs) match { - case (v, Nil) => v - case _ => throw new Exception("Not decodable") -} - // decoding of sequence of string tokens from a bitsequence -// tail-recursive version using an accumulator (alternative for -// vdecode) +// tail-recursive version using an accumulator @tailrec def sdecode_aux(rs: List[Rexp], bs: Bits, acc: List[String]) : List[String] = (rs, bs) match { case (Nil, _) => acc @@ -226,7 +172,7 @@ // derivative w.r.t. a string (iterates bder) @tailrec -def bders (s: List[Char], r: ARexp) : ARexp = s match { +def bders(s: List[Char], r: ARexp) : ARexp = s match { case Nil => r case c::s => bders(s, bder(c, r)) } @@ -238,8 +184,8 @@ } // calls blex and decodes the value -def blexing(r: Rexp, s: String) : Val = - vdecode(r, blex(internalise(r), s.toList)) +def blexing(r: Rexp, s: String) = + sdecode(r, blex(internalise(r), s.toList)) // example by Tudor @@ -283,45 +229,30 @@ case c::cs => blex_simp(bsimp(bder(c, r)), cs) } -// blexing_simp decodes a value from the bitsequence (potentially slow) -// blexing2_simp decodes a string-list from the bitsequence -def blexing_simp(r: Rexp, s: String) : Val = - vdecode(r, blex_simp(internalise(r), s.toList)) - -def blexing2_simp(r: Rexp, s: String) : List[String] = +// blexing_simp decodes a string-list from the bitsequence +def blexing_simp(r: Rexp, s: String) : List[String] = sdecode(r, blex_simp(internalise(r), s.toList)) //println(blexing_simp(reg, "aab")) -// extracts a string from value -def flatten(v: Val) : String = v match { - case Empty => "" - case Chr(c) => c.toString - case Left(v) => flatten(v) - case Right(v) => flatten(v) - case Sequ(v1, v2) => flatten(v1) + flatten(v2) - case Stars(vs) => vs.map(flatten).mkString + +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHARSET(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) + case RECD(_, r) => 1 + size(r) } -// extracts an environment from a value -def env(v: Val) : List[(String, String)] = v match { - case Empty => Nil - case Chr(c) => Nil - case Left(v) => env(v) - case Right(v) => env(v) - case Sequ(v1, v2) => env(v1) ::: env(v2) - case Stars(vs) => vs.flatMap(env) - case Recd(x, v) => (x, flatten(v))::env(v) -} - -def bsize(a: ARexp) = size(erase(a)) +def bsize(r: ARexp) = size(erase(r)) // Some Tests //============ - def time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() for (j <- 1 to i) code @@ -329,30 +260,50 @@ (end - start)/(i * 1.0e9) } +val ones = SEQ(SEQ(CHAR('/'), CHAR('*')), SEQ(STAR(CHAR('1')), SEQ(CHAR('*'), CHAR('/')))) +println("sizes unsimplified") +println(bsize(bders("/*".toList, internalise(ones)))) // 12 +println(bsize(bders("/*1".toList, internalise(ones)))) // 25 +println(bsize(bders("/*11".toList, internalise(ones)))) // 34 +println(bsize(bders("/*111".toList, internalise(ones)))) // 43 +println(bsize(bders("/*1111".toList, internalise(ones)))) // 52 +println("sizes simplified") +println(bsize(bsimp(bders("/*".toList, internalise(ones))))) // 6 +println(bsize(bsimp(bders("/*1".toList, internalise(ones))))) // 6 +println(bsize(bsimp(bders("/*11".toList, internalise(ones))))) // 6 +println(bsize(bsimp(bders("/*111".toList, internalise(ones))))) // 6 +println(bsize(bsimp(bders("/*1111".toList, internalise(ones))))) // 6 + +println("ones:") +for(i <- 0 to 10000 by 1000) { + println(s"$i: ${time_needed(1, blexing_simp(ones, "/*" ++ "1" * i ++ "*/"))}") +} + + + + +System.exit(1) + val evil1 = STAR(STAR("a")) ~ "b" val evil2 = STAR(STAR(STAR("a"))) ~ "b" val evil3 = STAR("aa" | "a") -/* println("evil1") for(i <- 0 to 10000 by 1000) { - println(time_needed(1, blexing2_simp(evil1, "a"*i ++ "b"))) + println(time_needed(1, blexing_simp(evil1, "a" * i ++ "b"))) } -*/ -/* + println("evil2") for(i <- 0 to 10000 by 1000) { - println(time_needed(1, blexing2_simp(evil2, "a"*i ++ "b"))) + println(time_needed(1, blexing_simp(evil2, "a" * i ++ "b"))) } -*/ -/* + println("evil3") for(i <- 0 to 10000 by 1000) { - println(time_needed(1, blexing2_simp(evil3, "a"*i))) + println(time_needed(1, blexing_simp(evil3, "a" * i))) } -*/ // WHILE LANGUAGE //================ @@ -384,16 +335,17 @@ // Some Simple While Tests //======================== +println("WHILE TESTS") + + val prog0 = """read n""" println(s"test: $prog0") -println(env(blexing_simp(WHILE_REGS, prog0))) -println(blexing2_simp(WHILE_REGS, prog0)) +println(blexing_simp(WHILE_REGS, prog0)) val prog1 = """read n; write n""" println(s"test: $prog1") -println(env(blexing_simp(WHILE_REGS, prog1))) -println(blexing2_simp(WHILE_REGS, prog1)) +println(blexing_simp(WHILE_REGS, prog1)) val prog2 = """ write "Fib"; @@ -411,8 +363,8 @@ """ println("lexing fib program (once)") -println(blexing2_simp(WHILE_REGS, prog2).filter(s => s == "" || !s.startsWith("w"))) +println(blexing_simp(WHILE_REGS, prog2).filter(s => s == "" || !s.startsWith("w"))) val n = 200 println(s"lexing fib program ($n times, size ${prog2.length * n})") -println(time_needed(1, blexing2_simp(WHILE_REGS, prog2 * n))) +println(time_needed(1, blexing_simp(WHILE_REGS, prog2 * n))) diff -r e752d84225ec -r 8bb064045b4e thys/RegLangs.thy --- a/thys/RegLangs.thy Mon Feb 22 03:22:26 2021 +0000 +++ b/thys/RegLangs.thy Thu Feb 25 22:46:58 2021 +0000 @@ -1,16 +1,15 @@ - theory RegLangs imports Main "~~/src/HOL/Library/Sublist" begin -section {* Sequential Composition of Languages *} +section \Sequential Composition of Languages\ definition Sequ :: "string set \ string set \ string set" ("_ ;; _" [100,100] 100) where "A ;; B = {s1 @ s2 | s1 s2. s1 \ A \ s2 \ B}" -text {* Two Simple Properties about Sequential Composition *} +text \Two Simple Properties about Sequential Composition\ lemma Sequ_empty_string [simp]: shows "A ;; {[]} = A" @@ -22,22 +21,8 @@ and "{} ;; A = {}" by (simp_all add: Sequ_def) -lemma - shows "(A \ B) ;; C = (A ;; C) \ (B ;; C)" - apply(auto simp add: Sequ_def) - done -lemma - shows "(A \ B) ;; C \ (A ;; C) \ (B ;; C)" - apply(auto simp add: Sequ_def) - done - -lemma - shows "(A ;; C) \ (B ;; C) \ (A \ B) ;; C" - apply(auto simp add: Sequ_def) - oops - -section {* Semantic Derivative (Left Quotient) of Languages *} +section \Semantic Derivative (Left Quotient) of Languages\ definition Der :: "char \ string set \ string set" @@ -75,7 +60,7 @@ by (auto simp add: Cons_eq_append_conv) -section {* Kleene Star for Languages *} +section \Kleene Star for Languages\ inductive_set Star :: "string set \ string set" ("_\" [101] 102) @@ -104,7 +89,7 @@ by(auto simp add: Star_decomp) -lemma Der_star [simp]: +lemma Der_star[simp]: shows "Der c (A\) = (Der c A) ;; A\" proof - have "Der c (A\) = Der c ({[]} \ A ;; A\)" @@ -134,37 +119,37 @@ -section {* Regular Expressions *} +section \Regular Expressions\ datatype rexp = ZERO | ONE -| CHAR char +| CH char | SEQ rexp rexp | ALT rexp rexp | STAR rexp -section {* Semantics of Regular Expressions *} +section \Semantics of Regular Expressions\ fun L :: "rexp \ string set" where "L (ZERO) = {}" | "L (ONE) = {[]}" -| "L (CHAR c) = {[c]}" +| "L (CH c) = {[c]}" | "L (SEQ r1 r2) = (L r1) ;; (L r2)" | "L (ALT r1 r2) = (L r1) \ (L r2)" | "L (STAR r) = (L r)\" -section {* Nullable, Derivatives *} +section \Nullable, Derivatives\ fun nullable :: "rexp \ bool" where "nullable (ZERO) = False" | "nullable (ONE) = True" -| "nullable (CHAR c) = False" +| "nullable (CH c) = False" | "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" | "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" | "nullable (STAR r) = True" @@ -175,7 +160,7 @@ where "der c (ZERO) = ZERO" | "der c (ONE) = ZERO" -| "der c (CHAR d) = (if c = d then ONE else ZERO)" +| "der c (CH d) = (if c = d then ONE else ZERO)" | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" | "der c (SEQ r1 r2) = (if nullable r1 diff -r e752d84225ec -r 8bb064045b4e thys/Spec.thy --- a/thys/Spec.thy Mon Feb 22 03:22:26 2021 +0000 +++ b/thys/Spec.thy Thu Feb 25 22:46:58 2021 +0000 @@ -3,10 +3,7 @@ imports RegLangs begin - - - -section {* "Plain" Values *} +section \"Plain" Values\ datatype val = Void @@ -17,7 +14,7 @@ | Stars "val list" -section {* The string behind a value *} +section \The string behind a value\ fun flat :: "val \ string" @@ -38,7 +35,7 @@ by (induct vs) (auto) -section {* Lexical Values *} +section \Lexical Values\ inductive Prf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) @@ -47,7 +44,7 @@ | "\ v1 : r1 \ \ Left v1 : ALT r1 r2" | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" | "\ Void : ONE" -| "\ Char c : CHAR c" +| "\ Char c : CH c" | "\v \ set vs. \ v : r \ flat v \ [] \ \ Stars vs : STAR r" inductive_cases Prf_elims: @@ -55,7 +52,7 @@ "\ v : SEQ r1 r2" "\ v : ALT r1 r2" "\ v : ONE" - "\ v : CHAR c" + "\ v : CH c" "\ vs : STAR r" lemma Prf_Stars_appendE: @@ -118,11 +115,11 @@ -section {* Sets of Lexical Values *} +section \Sets of Lexical Values\ -text {* +text \ Shows that lexical values are finite for a given regex and string. -*} +\ definition LV :: "rexp \ string \ val set" @@ -131,7 +128,7 @@ lemma LV_simps: shows "LV ZERO s = {}" and "LV ONE s = (if s = [] then {Void} else {})" - and "LV (CHAR c) s = (if s = [c] then {Char c} else {})" + and "LV (CH c) s = (if s = [c] then {Char c} else {})" and "LV (ALT r1 r2) s = Left ` LV r1 s \ Right ` LV r2 s" unfolding LV_def by (auto intro: Prf.intros elim: Prf.cases) @@ -229,8 +226,8 @@ case (ONE s) show "finite (LV ONE s)" by (simp add: LV_simps) next - case (CHAR c s) - show "finite (LV (CHAR c) s)" by (simp add: LV_simps) + case (CH c s) + show "finite (LV (CH c) s)" by (simp add: LV_simps) next case (ALT r1 r2 s) then show "finite (LV (ALT r1 r2) s)" by (simp add: LV_simps) @@ -258,13 +255,13 @@ -section {* Our inductive POSIX Definition *} +section \Our inductive POSIX Definition\ inductive Posix :: "string \ rexp \ val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) where Posix_ONE: "[] \ ONE \ Void" -| Posix_CHAR: "[c] \ (CHAR c) \ (Char c)" +| Posix_CH: "[c] \ (CH c) \ (Char c)" | Posix_ALT1: "s \ r1 \ v \ s \ (ALT r1 r2) \ (Left v)" | Posix_ALT2: "\s \ r2 \ v; s \ L(r1)\ \ s \ (ALT r1 r2) \ (Right v)" | Posix_SEQ: "\s1 \ r1 \ v1; s2 \ r2 \ v2; @@ -278,7 +275,7 @@ inductive_cases Posix_elims: "s \ ZERO \ v" "s \ ONE \ v" - "s \ CHAR c \ v" + "s \ CH c \ v" "s \ ALT r1 r2 \ v" "s \ SEQ r1 r2 \ v" "s \ STAR r \ v" @@ -287,13 +284,13 @@ assumes "s \ r \ v" shows "s \ L r" "flat v = s" using assms -by (induct s r v rule: Posix.induct) - (auto simp add: Sequ_def) + by(induct s r v rule: Posix.induct) + (auto simp add: Sequ_def) -text {* +text \ For a give value and string, our Posix definition determines a unique value. -*} +\ lemma Posix_determ: assumes "s \ r \ v1" "s \ r \ v2" @@ -304,8 +301,8 @@ have "[] \ ONE \ v2" by fact then show "Void = v2" by cases auto next - case (Posix_CHAR c v2) - have "[c] \ CHAR c \ v2" by fact + case (Posix_CH c v2) + have "[c] \ CH c \ v2" by fact then show "Char c = v2" by cases auto next case (Posix_ALT1 s r1 v r2 v2) @@ -362,9 +359,9 @@ qed -text {* +text \ Our POSIX values are lexical values. -*} +\ lemma Posix_LV: assumes "s \ r \ v"