lexing: changeset 361:8bb064045b4e

Binary file Literature/huet-zipper.pdf has changed

Binary file Literature/ziper-ders.pdf has changed

--- a/progs/scala/re-annotated.sc	Mon Feb 22 03:22:26 2021 +0000
+++ b/progs/scala/re-annotated.sc	Thu Feb 25 22:46:58 2021 +0000
@@ -50,6 +50,41 @@
 case class Right(v: Val) extends Val
 case class Stars(vs: List[Val]) extends Val
 case class Recd(x: String, v: Val) extends Val
+
+abstract class BTree
+case object LZ extends BTree
+case class L(bs: Bits) extends BTree
+case class LALTS(bs: Bits, bts: List[BTree]) extends BTree 
+case class LSEQ(bs: Bits, bt1: BTree, bt2: BTree) extends BTree 
+case class LSTAR(bs: Bits, bt: BTree) extends BTree
+
+def ext(r: ARexp): BTree = r match {
+  case AZERO => LZ
+  case AONE(bs) => L(bs)
+  case ACHAR(bs, _) => L(bs)
+  case AALTS(bs, rs) => LALTS(bs, rs.map(ext))
+  case ASEQ(bs, r1, r2) => LSEQ(bs, ext(r1), ext(r2))
+  case ASTAR(bs, r) => LSTAR(bs, ext(r))
+}
+
+// annotated regular expressions
+abstract class BRexp 
+case object BZERO extends BRexp
+case object BONE extends BRexp
+case class BCHAR(c: Char) extends BRexp
+case class BALTS(rs: List[BRexp]) extends BRexp 
+case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp 
+case class BSTAR(r: BRexp) extends BRexp 
+
+def ex(r: ARexp): BRexp = r match {
+  case AZERO => BZERO
+  case AONE(_) => BONE
+  case ACHAR(_, c) => BCHAR(c)
+  case AALTS(_, rs) => BALTS(rs.map(ex))
+  case ASEQ(_, r1, r2) => BSEQ(ex(r1), ex(r2))
+  case ASTAR(_, r) => BSTAR(ex(r))
+}
+
    
 // some convenience for typing in regular expressions
 def charlist2rexp(s: List[Char]): Rexp = s match {
@@ -234,7 +269,6 @@
 
 // example by Tudor
 //val reg = (STAR("a") ~ ("b" | "c")).%
-
 //println(blexing(reg, "aab"))

--- a/progs/scala/re-annotated2.sc	Mon Feb 22 03:22:26 2021 +0000
+++ b/progs/scala/re-annotated2.sc	Thu Feb 25 22:46:58 2021 +0000
@@ -51,15 +51,7 @@
 // an abbreviation for binary alternatives
 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
 
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Recd(x: String, v: Val) extends Val
-   
+
 // some convenience for typing in regular expressions
 def charlist2rexp(s: List[Char]): Rexp = s match {
   case Nil => ONE
@@ -83,17 +75,6 @@
   def $ (r: Rexp) = RECD(s, r)
 }
 
-def size(r: Rexp) : Int = r match {
-  case ZERO => 1
-  case ONE => 1
-  case ALT(r1, r2) => 1 + size(r1) + size(r2)
-  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
-  case STAR(r) => 1 + size(r)
-  case RECD(_, r) => 1 + size(r)
-  case CHARSET(_) => 1
-}
-
-
 
 // Bitcoded + Annotation
 //=======================
@@ -134,43 +115,8 @@
 // internalise(("a" | "ab") ~ ("b" | ""))
 
 
-// decoding of a value from a bitsequence
-// (this is not tail-recursive and therefore a potential bottleneck)
-def vdecode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
-  case (ONE, bs) => (Empty, bs)
-  case (ALT(r1, r2), Z::bs) => {
-    val (v, bs1) = vdecode_aux(r1, bs)
-    (Left(v), bs1)
-  }
-  case (ALT(r1, r2), S::bs) => {
-    val (v, bs1) = vdecode_aux(r2, bs)
-    (Right(v), bs1)
-  }
-  case (SEQ(r1, r2), bs) => {
-    val (v1, bs1) = vdecode_aux(r1, bs)
-    val (v2, bs2) = vdecode_aux(r2, bs1)
-    (Sequ(v1, v2), bs2)
-  }
-  case (STAR(r1), Z::bs) => {
-    val (v, bs1) = vdecode_aux(r1, bs)
-    val (Stars(vs), bs2) = vdecode_aux(STAR(r1), bs1)
-    (Stars(v::vs), bs2)
-  }
-  case (STAR(_), S::bs) => (Stars(Nil), bs)
-  case (RECD(s, r1), bs) => 
-    val (v, bs1) = vdecode_aux(r1, bs)
-    (Recd(s, v), bs1)
-  case (CHARSET(_), C(c)::bs) => (Chr(c), bs)
-}
-
-def vdecode(r: Rexp, bs: Bits) = vdecode_aux(r, bs) match {
-  case (v, Nil) => v
-  case _ => throw new Exception("Not decodable")
-}
-
 // decoding of sequence of string tokens from a bitsequence
-// tail-recursive version using an accumulator (alternative for 
-// vdecode)
+// tail-recursive version using an accumulator
 @tailrec
 def sdecode_aux(rs: List[Rexp], bs: Bits, acc: List[String]) : List[String] = (rs, bs) match {
   case (Nil, _) => acc
@@ -226,7 +172,7 @@
 
 // derivative w.r.t. a string (iterates bder)
 @tailrec
-def bders (s: List[Char], r: ARexp) : ARexp = s match {
+def bders(s: List[Char], r: ARexp) : ARexp = s match {
   case Nil => r
   case c::s => bders(s, bder(c, r))
 }
@@ -238,8 +184,8 @@
 }
 
 // calls blex and decodes the value
-def blexing(r: Rexp, s: String) : Val = 
-  vdecode(r, blex(internalise(r), s.toList))
+def blexing(r: Rexp, s: String) = 
+  sdecode(r, blex(internalise(r), s.toList))
 
 
 // example by Tudor
@@ -283,45 +229,30 @@
   case c::cs => blex_simp(bsimp(bder(c, r)), cs)
 }
 
-// blexing_simp decodes a value from the bitsequence (potentially slow)
-// blexing2_simp decodes a string-list from the bitsequence
-def blexing_simp(r: Rexp, s: String) : Val = 
-  vdecode(r, blex_simp(internalise(r), s.toList))
-
-def blexing2_simp(r: Rexp, s: String) : List[String] = 
+// blexing_simp decodes a string-list from the bitsequence
+def blexing_simp(r: Rexp, s: String) : List[String] = 
   sdecode(r, blex_simp(internalise(r), s.toList))
 
 
 //println(blexing_simp(reg, "aab"))
 
 
-// extracts a string from value
-def flatten(v: Val) : String = v match {
-  case Empty => ""
-  case Chr(c) => c.toString
-  case Left(v) => flatten(v)
-  case Right(v) => flatten(v)
-  case Sequ(v1, v2) => flatten(v1) + flatten(v2)
-  case Stars(vs) => vs.map(flatten).mkString
+
+def size(r: Rexp) : Int = r match {
+  case ZERO => 1
+  case ONE => 1
+  case CHARSET(_) => 1
+  case ALT(r1, r2) => 1 + size(r1) + size(r2)
+  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+  case STAR(r) => 1 + size(r)
+  case RECD(_, r) => 1 + size(r)
 }
 
-// extracts an environment from a value
-def env(v: Val) : List[(String, String)] = v match {
-  case Empty => Nil
-  case Chr(c) => Nil
-  case Left(v) => env(v)
-  case Right(v) => env(v)
-  case Sequ(v1, v2) => env(v1) ::: env(v2)
-  case Stars(vs) => vs.flatMap(env)
-  case Recd(x, v) => (x, flatten(v))::env(v)
-}
-
-def bsize(a: ARexp) = size(erase(a))
+def bsize(r: ARexp) = size(erase(r))
 
 // Some Tests
 //============
 
-
 def time_needed[T](i: Int, code: => T) = {
   val start = System.nanoTime()
   for (j <- 1 to i) code
@@ -329,30 +260,50 @@
   (end - start)/(i * 1.0e9)
 }
 
+val ones = SEQ(SEQ(CHAR('/'), CHAR('*')), SEQ(STAR(CHAR('1')), SEQ(CHAR('*'), CHAR('/'))))
+println("sizes unsimplified")
+println(bsize(bders("/*".toList, internalise(ones))))     // 12
+println(bsize(bders("/*1".toList, internalise(ones))))    // 25
+println(bsize(bders("/*11".toList, internalise(ones))))   // 34
+println(bsize(bders("/*111".toList, internalise(ones))))  // 43
+println(bsize(bders("/*1111".toList, internalise(ones)))) // 52
+println("sizes simplified")
+println(bsize(bsimp(bders("/*".toList, internalise(ones)))))     // 6
+println(bsize(bsimp(bders("/*1".toList, internalise(ones)))))    // 6
+println(bsize(bsimp(bders("/*11".toList, internalise(ones)))))   // 6
+println(bsize(bsimp(bders("/*111".toList, internalise(ones)))))  // 6
+println(bsize(bsimp(bders("/*1111".toList, internalise(ones))))) // 6
+
+println("ones:")
+for(i <- 0 to 10000 by 1000) {
+    println(s"$i: ${time_needed(1, blexing_simp(ones, "/*" ++ "1" * i ++ "*/"))}")
+}
+
+
+
+
+System.exit(1)
+
 val evil1 = STAR(STAR("a")) ~ "b"
 val evil2 = STAR(STAR(STAR("a"))) ~ "b"
 val evil3 = STAR("aa" | "a")
 
-/*
 println("evil1")
 for(i <- 0 to 10000 by 1000) {
-    println(time_needed(1, blexing2_simp(evil1, "a"*i ++ "b")))
+    println(time_needed(1, blexing_simp(evil1, "a" * i ++ "b")))
 }
-*/
 
-/*
+
 println("evil2")
 for(i <- 0 to 10000 by 1000) {
-    println(time_needed(1, blexing2_simp(evil2, "a"*i ++ "b")))
+    println(time_needed(1, blexing_simp(evil2, "a" * i ++ "b")))
 }
-*/
 
-/*
+
 println("evil3")
 for(i <- 0 to 10000 by 1000) {
-    println(time_needed(1, blexing2_simp(evil3, "a"*i)))
+    println(time_needed(1, blexing_simp(evil3, "a" * i)))
 }
-*/
 
 // WHILE LANGUAGE
 //================
@@ -384,16 +335,17 @@
 
 // Some Simple While Tests
 //========================
+println("WHILE TESTS")
+
+
 
 val prog0 = """read n"""
 println(s"test: $prog0")
-println(env(blexing_simp(WHILE_REGS, prog0)))
-println(blexing2_simp(WHILE_REGS, prog0))
+println(blexing_simp(WHILE_REGS, prog0))
 
 val prog1 = """read  n; write n"""  
 println(s"test: $prog1")
-println(env(blexing_simp(WHILE_REGS, prog1)))
-println(blexing2_simp(WHILE_REGS, prog1))
+println(blexing_simp(WHILE_REGS, prog1))
 
 val prog2 = """
 write "Fib";
@@ -411,8 +363,8 @@
 """
 
 println("lexing fib program (once)")
-println(blexing2_simp(WHILE_REGS, prog2).filter(s => s == "" || !s.startsWith("w")))
+println(blexing_simp(WHILE_REGS, prog2).filter(s => s == "" || !s.startsWith("w")))
 
 val n = 200
 println(s"lexing fib program ($n times, size ${prog2.length * n})")
-println(time_needed(1, blexing2_simp(WHILE_REGS, prog2 * n)))
+println(time_needed(1, blexing_simp(WHILE_REGS, prog2 * n)))

--- a/thys/RegLangs.thy	Mon Feb 22 03:22:26 2021 +0000
+++ b/thys/RegLangs.thy	Thu Feb 25 22:46:58 2021 +0000
@@ -1,16 +1,15 @@
-   
 theory RegLangs
   imports Main "~~/src/HOL/Library/Sublist"
 begin
 
-section {* Sequential Composition of Languages *}
+section \<open>Sequential Composition of Languages\<close>
 
 definition
   Sequ :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
 where 
   "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
 
-text {* Two Simple Properties about Sequential Composition *}
+text \<open>Two Simple Properties about Sequential Composition\<close>
 
 lemma Sequ_empty_string [simp]:
   shows "A ;; {[]} = A"
@@ -22,22 +21,8 @@
   and   "{} ;; A = {}"
   by (simp_all add: Sequ_def)
 
-lemma
-  shows "(A \<union> B) ;; C = (A ;; C) \<union> (B ;; C)"
-  apply(auto simp add: Sequ_def)  
-  done
 
-lemma
-  shows "(A \<inter> B) ;; C \<subseteq> (A ;; C) \<inter> (B ;; C)"
-  apply(auto simp add: Sequ_def) 
-  done
-
-lemma
-  shows "(A ;; C) \<inter> (B ;; C) \<subseteq> (A \<inter> B) ;; C"
-  apply(auto simp add: Sequ_def) 
-  oops
-
-section {* Semantic Derivative (Left Quotient) of Languages *}
+section \<open>Semantic Derivative (Left Quotient) of Languages\<close>
 
 definition
   Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
@@ -75,7 +60,7 @@
 by (auto simp add: Cons_eq_append_conv)
 
 
-section {* Kleene Star for Languages *}
+section \<open>Kleene Star for Languages\<close>
 
 inductive_set
   Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
@@ -104,7 +89,7 @@
 by(auto simp add: Star_decomp)
 
 
-lemma Der_star [simp]:
+lemma Der_star[simp]:
   shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
 proof -    
   have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"  
@@ -134,37 +119,37 @@
 
 
 
-section {* Regular Expressions *}
+section \<open>Regular Expressions\<close>
 
 datatype rexp =
   ZERO
 | ONE
-| CHAR char
+| CH char
 | SEQ rexp rexp
 | ALT rexp rexp
 | STAR rexp
 
-section {* Semantics of Regular Expressions *}
+section \<open>Semantics of Regular Expressions\<close>
  
 fun
   L :: "rexp \<Rightarrow> string set"
 where
   "L (ZERO) = {}"
 | "L (ONE) = {[]}"
-| "L (CHAR c) = {[c]}"
+| "L (CH c) = {[c]}"
 | "L (SEQ r1 r2) = (L r1) ;; (L r2)"
 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
 | "L (STAR r) = (L r)\<star>"
 
 
-section {* Nullable, Derivatives *}
+section \<open>Nullable, Derivatives\<close>
 
 fun
  nullable :: "rexp \<Rightarrow> bool"
 where
   "nullable (ZERO) = False"
 | "nullable (ONE) = True"
-| "nullable (CHAR c) = False"
+| "nullable (CH c) = False"
 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
 | "nullable (STAR r) = True"
@@ -175,7 +160,7 @@
 where
   "der c (ZERO) = ZERO"
 | "der c (ONE) = ZERO"
-| "der c (CHAR d) = (if c = d then ONE else ZERO)"
+| "der c (CH d) = (if c = d then ONE else ZERO)"
 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
 | "der c (SEQ r1 r2) = 
      (if nullable r1

--- a/thys/Spec.thy	Mon Feb 22 03:22:26 2021 +0000
+++ b/thys/Spec.thy	Thu Feb 25 22:46:58 2021 +0000
@@ -3,10 +3,7 @@
   imports RegLangs
 begin
 
-
-
-
-section {* "Plain" Values *}
+section \<open>"Plain" Values\<close>
 
 datatype val = 
   Void
@@ -17,7 +14,7 @@
 | Stars "val list"
 
 
-section {* The string behind a value *}
+section \<open>The string behind a value\<close>
 
 fun 
   flat :: "val \<Rightarrow> string"
@@ -38,7 +35,7 @@
 by (induct vs) (auto)
 
 
-section {* Lexical Values *}
+section \<open>Lexical Values\<close>
 
 inductive 
   Prf :: "val \<Rightarrow> rexp \<Rightarrow> bool" ("\<Turnstile> _ : _" [100, 100] 100)
@@ -47,7 +44,7 @@
 | "\<Turnstile> v1 : r1 \<Longrightarrow> \<Turnstile> Left v1 : ALT r1 r2"
 | "\<Turnstile> v2 : r2 \<Longrightarrow> \<Turnstile> Right v2 : ALT r1 r2"
 | "\<Turnstile> Void : ONE"
-| "\<Turnstile> Char c : CHAR c"
+| "\<Turnstile> Char c : CH c"
 | "\<forall>v \<in> set vs. \<Turnstile> v : r \<and> flat v \<noteq> [] \<Longrightarrow> \<Turnstile> Stars vs : STAR r"
 
 inductive_cases Prf_elims:
@@ -55,7 +52,7 @@
   "\<Turnstile> v : SEQ r1 r2"
   "\<Turnstile> v : ALT r1 r2"
   "\<Turnstile> v : ONE"
-  "\<Turnstile> v : CHAR c"
+  "\<Turnstile> v : CH c"
   "\<Turnstile> vs : STAR r"
 
 lemma Prf_Stars_appendE:
@@ -118,11 +115,11 @@
 
 
 
-section {* Sets of Lexical Values *}
+section \<open>Sets of Lexical Values\<close>
 
-text {*
+text \<open>
   Shows that lexical values are finite for a given regex and string.
-*}
+\<close>
 
 definition
   LV :: "rexp \<Rightarrow> string \<Rightarrow> val set"
@@ -131,7 +128,7 @@
 lemma LV_simps:
   shows "LV ZERO s = {}"
   and   "LV ONE s = (if s = [] then {Void} else {})"
-  and   "LV (CHAR c) s = (if s = [c] then {Char c} else {})"
+  and   "LV (CH c) s = (if s = [c] then {Char c} else {})"
   and   "LV (ALT r1 r2) s = Left ` LV r1 s \<union> Right ` LV r2 s"
 unfolding LV_def
 by (auto intro: Prf.intros elim: Prf.cases)
@@ -229,8 +226,8 @@
   case (ONE s)
   show "finite (LV ONE s)" by (simp add: LV_simps)
 next
-  case (CHAR c s)
-  show "finite (LV (CHAR c) s)" by (simp add: LV_simps)
+  case (CH c s)
+  show "finite (LV (CH c) s)" by (simp add: LV_simps)
 next 
   case (ALT r1 r2 s)
   then show "finite (LV (ALT r1 r2) s)" by (simp add: LV_simps)
@@ -258,13 +255,13 @@
 
 
 
-section {* Our inductive POSIX Definition *}
+section \<open>Our inductive POSIX Definition\<close>
 
 inductive 
   Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
 where
   Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
-| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
+| Posix_CH: "[c] \<in> (CH c) \<rightarrow> (Char c)"
 | Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
 | Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
 | Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
@@ -278,7 +275,7 @@
 inductive_cases Posix_elims:
   "s \<in> ZERO \<rightarrow> v"
   "s \<in> ONE \<rightarrow> v"
-  "s \<in> CHAR c \<rightarrow> v"
+  "s \<in> CH c \<rightarrow> v"
   "s \<in> ALT r1 r2 \<rightarrow> v"
   "s \<in> SEQ r1 r2 \<rightarrow> v"
   "s \<in> STAR r \<rightarrow> v"
@@ -287,13 +284,13 @@
   assumes "s \<in> r \<rightarrow> v"
   shows "s \<in> L r" "flat v = s"
 using assms
-by (induct s r v rule: Posix.induct)
-   (auto simp add: Sequ_def)
+  by(induct s r v rule: Posix.induct)
+    (auto simp add: Sequ_def)
 
-text {*
+text \<open>
   For a give value and string, our Posix definition 
   determines a unique value.
-*}
+\<close>
 
 lemma Posix_determ:
   assumes "s \<in> r \<rightarrow> v1" "s \<in> r \<rightarrow> v2"
@@ -304,8 +301,8 @@
   have "[] \<in> ONE \<rightarrow> v2" by fact
   then show "Void = v2" by cases auto
 next 
-  case (Posix_CHAR c v2)
-  have "[c] \<in> CHAR c \<rightarrow> v2" by fact
+  case (Posix_CH c v2)
+  have "[c] \<in> CH c \<rightarrow> v2" by fact
   then show "Char c = v2" by cases auto
 next 
   case (Posix_ALT1 s r1 v r2 v2)
@@ -362,9 +359,9 @@
 qed
 
 
-text {*
+text \<open>
   Our POSIX values are lexical values.
-*}
+\<close>
 
 lemma Posix_LV:
   assumes "s \<in> r \<rightarrow> v"

author	Christian Urban <christian.urban@kcl.ac.uk>
	Thu, 25 Feb 2021 22:46:58 +0000
changeset 361	8bb064045b4e
parent 360	e752d84225ec
child 362	e51c9a67a68d

Literature/huet-zipper.pdf		file \| annotate \| diff \| comparison \| revisions
Literature/ziper-ders.pdf		file \| annotate \| diff \| comparison \| revisions
progs/scala/re-annotated.sc		file \| annotate \| diff \| comparison \| revisions
progs/scala/re-annotated2.sc		file \| annotate \| diff \| comparison \| revisions
thys/RegLangs.thy		file \| annotate \| diff \| comparison \| revisions
thys/Spec.thy		file \| annotate \| diff \| comparison \| revisions