--- a/Brexp.scala Thu May 07 11:36:15 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,185 +0,0 @@
-import RexpRelated._
-import RexpRelated.Rexp._
-import Spiral._
-import scala.collection.mutable.ArrayBuffer
-abstract class BRexp
-case object BZERO extends BRexp
-case object BONE extends BRexp
-case class BCHAR(c: Char) extends BRexp
-case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp
-case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp
-case class BSTAR(r: BRexp) extends BRexp
-
-object BRexp{
- def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there
- //are no enclosing STAR or SEQ
- case ZERO => BZERO
- case ONE => BONE
- case CHAR(c) => BCHAR(c)
- case ALTS(rs) => BALTS(Z, rs.map(brternalise))
- case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) )
- case STAR(r) => BSTAR(brternalise(r))
- case RECD(x, r) => brternalise(r)
- }
- def brnullable (r: BRexp) : Boolean = r match {
- case BZERO => false
- case BONE => true
- case BCHAR(_) => false
- case BALTS(bs, rs) => rs.exists(brnullable)
- case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2)
- case BSTAR(_) => true
- }
- //this function tells bspill when converting a brexp into a list of rexps
- //the conversion : (a+b)*c -> {a*c, b*c} can only happen when a+b is generated from scratch (e.g. when deriving a seq)
- //or is from the original regex but have been "touched" (i.e. have been derived)
- //Why make this distinction? Look at the following example:
- //r = (c+cb)+c(a+b)
- // r\c = (1+b)+(a+b)
- //after simplification
- // (r\c)simp= 1+b+a
- //we lost the structure that tells us 1+b should be grouped together and a grouped as itself
- //however in our brexp simplification,
- //(r\c)br_simp = 1+b+(a+b)
- //we do not allow the bracket to be opened as it is from the original expression and have not been touched
- def brder(c: Char, r: BRexp) : BRexp = r match {
- case BZERO => BZERO
- case BONE => BZERO
- case BCHAR(d) => if (c == d) BONE else BZERO
- case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD
- case BSEQ(r1, r2) =>
- if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit
- else BSEQ(brder(c, r1), r2)
- case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r))
- }
- def bflat(rs: List[BRexp]): List[BRexp] = {
- rs match {
- case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
- case BZERO :: rs1 => bflat(rs1)
- case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2)
- case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2)
- case r1 :: rs2 => r1 :: bflat(rs2)
- }
- }
- def stflat(rs: List[BRexp]): List[BRexp] = {
- //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
- //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil May add early termination later.
- rs match {
- case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
- case BZERO :: rs1 => bflat(rs1)
- case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2)
- case r1 :: rs2 => r1 :: bflat(rs2)
- }
- }
- def berase(r:BRexp): Rexp = r match{
- case BZERO => ZERO
- case BONE => ONE
- case BCHAR(f) => CHAR(f)
- case BALTS(bs, rs) => ALTS(rs.map(berase(_)))
- case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2))
- case BSTAR(r)=> STAR(berase(r))
- }
- def br_simp(r: BRexp): BRexp = r match {
- case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match {
- case (BZERO, _) => BZERO
- case (_, BZERO) => BZERO
- case (BONE, r2s) => r2s
- case (r1s, r2s) => BSEQ(r1s, r2s)
- }
- case BALTS(bs, rs) => {
- //assert(bs.length == rs.length - 1)
- val dist_res = if(bs == S) {//all S
- val rs_simp = rs.map(br_simp)
- val flat_res = bflat(rs_simp)
- distinctBy(flat_res, berase)
- }
- else{//not allowed to simplify (all Z)
- rs
- }
- dist_res match {
- case Nil => {BZERO}
- case s :: Nil => { s}
- case rs => {BALTS(bs, rs)}
- }
- }
- //case BSTAR(r) => BSTAR(r)
- case r => r
- }
-
- def strong_br_simp(r: BRexp): BRexp = r match {
- case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match {
- case (BZERO, _) => BZERO
- case (_, BZERO) => BZERO
- case (BONE, r2s) => r2s
- case (r1s, r2s) => BSEQ(r1s, r2s)
- }
- case BALTS(bs, rs) => {
- //assert(bs.length == rs.length - 1)
- val dist_res = {//all S
- val rs_simp = rs.map(strong_br_simp)
- val flat_res = stflat(rs_simp)
- distinctBy(flat_res, berase)
- }
- dist_res match {
- case Nil => {BZERO}
- case s :: Nil => { s}
- case rs => {BALTS(bs, rs)}
- }
- }
- case BSTAR(r) => BSTAR(strong_br_simp(r))
- case r => r
- }
- //we want to bound the size by a function bspill s.t.
- //bspill(ders_simp(r,s)) ⊂ PD(r)
- //and bspill is size-preserving (up to a constant factor)
- //so we bound |ders_simp(r,s)| by |PD(r)|
- //where we already have a nice bound for |PD(r)|: t^2*n^2 in Antimirov's paper
-
- //the function bspill converts a brexp into a list of rexps
- //the conversion mainly about this: (r1+r2)*r3*r4*.....rn -> {r1*r3*r4*....rn, r2*r3*r4*.....rn}
- //but this is not always allowed
- //sometimes, we just leave the expression as it is:
- //eg1: (a+b)*c -> {(a+b)*c}
- //eg2: r1+r2 -> {r1+r2} instead of {r1, r2}
- //why?
- //if we always return {a, b} when we encounter a+b, the property
- //bspill(ders_simp(r,s)) ⊂ PD(r)
- //does not always hold
- //for instance
- //if we call the bspill that always returns {r1, r2} when encountering r1+r2 "bspilli"
- //then bspilli( ders_simp( (c+cb)+c(a+b), c ) ) == bspilli(1+b+a) = {1, b, a}
- //However the letter a does not belong to PD( (c+cb)+c(a+b) )
- //then we can no longer say ders_simp(r,s)'s size is bounded by PD(r) because the former contains something the latter does not have
- //In order to make sure the inclusion holds, we have to find out why new terms appeared in the bspilli set that don't exist in the PD(r) set
- //Why a does not belong to PD((c+cb)+c(a+b))?
- //PD(r1+r2) = PD(r1) union PD(r2) => PD((c+cb)+c(a+b)) = PD(c+cb) union PD(c(a+b))
- //So the only possibility for PD to include a must be in the latter part of the regex, namely, c(a+b)
- //we have lemma that PD(r) = union of pders(s, r) where s is taken over all strings whose length does not exceed depth(r)
- //so PD(r) ⊂ pder(empty_string, r) union pder(c, r) union pder(ca, r) union pder(cb, r) where r = c(a+b)
- //RHS = {1} union pder(c, c(a+b))
- //Observe that pder(c, c(a+b)) == {a+b}
- //a and b are together, from the original regular expression (c+cb)+c(a+b).
- //but by our simplification, we first flattened this a+b into the same level with 1+b, then
- //removed duplicates of b, thereby destroying the structure in a+b and making this term a, instead of a+b
- //But in PD(r) we only have a+b, no a
- //one ad hoc solution might be to try this bspill(ders_simp(r,s)) ⊂ PD(r) union {all letters}
- //But this does not hold either according to experiment.
- //So we need to make sure the structure r1+r2 is not simplified away if it is from the original expression
-
-
-
- def bspill(r: BRexp): List[Rexp] = {
- r match {
- case BALTS(b, rs) => {
- if(b == S)
- rs.flatMap(bspill)
- else
- List(ALTS(rs.map(berase)))
- }
- case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) )
- case BZERO => List()
- case r => List(berase(r))
- }
-
- }
-
-}
\ No newline at end of file
--- a/Partial.scala Thu May 07 11:36:15 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-import RexpRelated._
-import RexpRelated.Rexp._
-import Spiral._
-object Partial{
- def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
- case ZERO => Set()
- case ONE => rs
- case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
- }
- def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
- case ZERO => Set()
- case ONE => l
- case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
- }
- def lf(r: Rexp): Lin = r match {
- case ZERO => Set()
- case ONE => Set()
- case CHAR(f) => {
- //val Some(c) = alphabet.find(f)
- Set((f, ONE))
- }
- case ALTS(rs) => {
- rs.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
- }
- case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
- case SEQ(r1, r2) =>{
- if (nullable(r1))
- cir_prod(lf(r1), r2) ++ lf(r2)
- else
- cir_prod(lf(r1), r2)
- }
- }
- def lfs(r: Set[Rexp]): Lin = {
- r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
- }
-
- def pder(x: Char, t: Rexp): Set[Rexp] = {
- val lft = lf(t)
- (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
- }
- def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
- case x::xs => pders(xs, pder(x, t))
- case Nil => Set(t)
- }
- def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
- case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
- case Nil => ts
- }
- def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
- def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
- //all implementation of partial derivatives that involve set union are potentially buggy
- //because they don't include the original regular term before they are pdered.
- //now only pderas is fixed.
- def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
-
- def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
- def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
- def o(r: Rexp) = if (nullable(r)) ONE else ZERO
- def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
- def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
- case ZERO => Set[Rexp]()
- case ONE => Set[Rexp]()
- case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
- case ALTS(rs) => rs.foldLeft(Set[Rexp]())((acc, r) => acc ++ pdp(x, r))
- case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
- case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
- }
- def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
- case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
- case Nil => ts
- }
- def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
-
- //this function currently the problem of counting the same char as different ones
- //because they are defined as "pred"
- def subterms(r: Rexp): Set[Rexp] = {//excluding zeros.
- r match {
- case ZERO => {
- Set[Rexp]()
- }
- case SEQ(r1, r2) => {
- Set(r1, r2) ++ subterms(r1) ++ subterms(r2)
- }
- case ALTS(rs) => {
- rs.toSet ++ rs.foldLeft(Set[Rexp]())((acc, r) => acc ++ subterms(r))
- }
- case STAR(r) => {
- Set(r) ++ subterms(r)
- }
- case r => Set(r)
- }
- }
- def comp(s: List[Char], t: Rexp) = {
- var r = internalise(t)
- var setr = Set(t)
-
- for(i <- 0 to s.length - 1){
- val mamaipi = bsimp(bder(s(i), r))
- val mamaimapi = pdps(List(s(i)), setr)
- //compare dersimp and pder w.r.t each character in string s
- println("current string: " + s.slice(0,i + 1))
- println("der+simp: ")
- println(aregx_tree(mamaipi))
- println("pder: ")
- mamaimapi.foreach(m => println(regx_tree(m)))
- r = mamaipi
- setr = mamaimapi
- }
- for(i <- 1 to 10)
- println(pderas(Set(t), i).size, i)
- val alphabet_star_t = pderas(Set(t), 10)
- println("all possible elements in pder (probably...): ")
- alphabet_star_t.foreach(r => println(regx_tree(r)))
- }
-}
-/* val delta = lfs(t).map(mon => mon._2)
- if(delta.size == t.size)
- {
- println(delta.size)
- println("steady: "+delta.size)
- return delta
- }
-
- else
- {
- val a = pderas(delta)
- println("increment: "+delta.size+a.size)
- return a
- }*/
--- a/Spiral.scala Thu May 07 11:36:15 2020 +0100
+++ b/Spiral.scala Wed May 27 22:23:52 2020 +0100
@@ -1,244 +1,19 @@
+
+import Spiral._
import Element.elem
import RexpRelated._
import RexpRelated.Rexp._
-import Partial._
+//import Partial._
import scala.collection.mutable.ListBuffer
+
object Spiral{
- val space = elem(" ")
- val corner = elem("+")
- def spiral(nEdges: Int, direction: Int): Element = {
- if(nEdges == 1)
- elem("+")
- else {
- val sp = spiral(nEdges - 1, (direction + 3) % 4)
- def verticalBar = elem('|', 1, sp.height)
- def horizontalBar = elem('-', sp.width, 1)
- if(direction == 0)
- (corner beside horizontalBar) above sp//(sp beside space)
- else if (direction == 1)
- sp beside (corner above verticalBar)
- else if (direction == 2)
- (space beside sp) above (horizontalBar beside corner)
- else
- (verticalBar above corner) beside (space above sp)
- }
- }
val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c')
- def regx_tree(r: Rexp): Element = aregx_tree(internalise(r))
- def annotated_tree(r: ARexp): Element = {
- r match {
- case ACHAR(bs, d) => {
- //val Some(d) = alphabet.find(f)
- d match {
- case '\n' => elem("\\n")
- case '\t' => elem("\\t")
- case ' ' => elem("space")
- case d => if(bs.isEmpty) elem(d.toString) else elem(d.toString++" ") beside elem(bs.toString)
- }
- }
- case AONE(bs) => {
- if(bs.isEmpty) elem("ONE") else elem("ONE ") beside elem(bs.toString)
- }
- case AZERO => {
- elem("ZERO")
- }
- case ASEQ(bs, r1, r2) => {
- annotated_binary_print("SEQ", r1, r2, bs)
- }
- case AALTS(bs, rs) => {
- //elem("Awaiting completion")
- annotated_list_print("ALT", rs, bs)
- }
- case ASTAR(bs, r) => {
- annotated_list_print("STA", List(r), bs)
- }
- }
- }
- def aregx_tree(r: ARexp): Element = {
- r match {
- case ACHAR(bs, d) => {
- //val Some(d) = alphabet.find(f)
- d match {
- case '\n' => elem("\\n")
- case '\t' => elem("\\t")
- case ' ' => elem("space")
- case d => elem(d.toString)
- }
- }
- case AONE(bs) => {
- elem("ONE")
- }
- case AZERO => {
- elem("ZERO")
- }
- case ASEQ(bs, r1, r2) => {
- binary_print("SEQ", r1, r2)
- }
- case AALTS(bs, rs) => {
- //elem("Awaiting completion")
- list_print("ALT", rs)
- }
- case ASTAR(bs, r) => {
- list_print("STA", List(r))
- }
- }
- }
- val port = elem(" └-")
- def list_print(name: String, rs: List[ARexp]): Element = {
- rs match {
- case r::Nil => {
- val pref = aregx_tree(r)
- val head = elem(name)
- (head left_align (port up_align pref) )
- }
- case r2::r1::Nil => {
- binary_print(name, r2, r1)
- }
- case r::rs1 => {
- val pref = aregx_tree(r)
- val head = elem(name)
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- (head left_align ((port above link) beside pref)) left_align tail_print(rs1)
- }
- else{
- (head left_align (port beside pref) ) left_align tail_print(rs1)
- }
- }
- }
- }
- def annotated_list_print(name: String, rs: List[ARexp], bs: List[Bit]): Element = {
- rs match {
- case r::Nil => {
- val pref = annotated_tree(r)
- val head = if(bs.isEmpty) elem(name) else elem(name ++ " ") beside elem(bs.toString)
- (head left_align (port up_align pref) )
- }
- case r2::r1::Nil => {
- annotated_binary_print(name, r2, r1, bs)
- }
- case r::rs1 => {
- val pref = annotated_tree(r)
- val head = if (bs.isEmpty) elem(name) else elem(name ++ " ") beside elem(bs.toString)
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- (head left_align ((port above link) beside pref)) left_align annotated_tail_print(rs1)
- }
- else{
- (head left_align (port beside pref) ) left_align annotated_tail_print(rs1)
- }
- }
- }
- }
-
- def annotated_tail_print(rs: List[ARexp]): Element = {
- rs match {
- case r2::r1::Nil => {
- val pref = annotated_tree(r2)
- val suff = annotated_tree(r1)
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- ((port above link) beside pref) left_align (port up_align suff)
- }
- else{
- (port beside pref) left_align (port up_align suff)
- }
- }
- case r2::rs1 => {
- val pref = annotated_tree(r2)
-
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- ((port above link) beside pref) left_align annotated_tail_print(rs1)//(port up_align tail_print(rs1) )
- }
- else{
- (port beside pref) left_align annotated_tail_print(rs1)//(port up_align tail_print(rs1))
- }
- //pref left_align tail_print(rs1)
- }
- }
- }
-
- def tail_print(rs: List[ARexp]): Element = {
- rs match {
- case r2::r1::Nil => {
- val pref = aregx_tree(r2)
- val suff = aregx_tree(r1)
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- ((port above link) beside pref) left_align (port up_align suff)
- }
- else{
- (port beside pref) left_align (port up_align suff)
- }
- }
- case r2::rs1 => {
- val pref = aregx_tree(r2)
-
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- ((port above link) beside pref) left_align tail_print(rs1)//(port up_align tail_print(rs1) )
- }
- else{
- (port beside pref) left_align tail_print(rs1)//(port up_align tail_print(rs1))
- }
- //pref left_align tail_print(rs1)
- }
- }
- }
-
- def binary_print(name: String, r1: ARexp, r2: ARexp): Element = {
- val pref = aregx_tree(r1)
- val suff = aregx_tree(r2)
- val head = elem(name)
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- (head left_align ((port above link) beside pref) ) left_align (port up_align suff)
- }
- else{
- (head left_align (port beside pref) ) left_align (port up_align suff)
- }
- }
- def annotated_binary_print(name: String, r1: ARexp, r2: ARexp, bs: List[Bit]): Element = {
- val pref = annotated_tree(r1)
- val suff = annotated_tree(r2)
- val head = if (bs.isEmpty) elem(name) else elem(name ++ " ") beside elem(bs.toString)
- if (pref.height > 1){
- val link = elem('|', 1, pref.height - 1)
- (head left_align ((port above link) beside pref) ) left_align (port up_align suff)
- }
- else{
- (head left_align (port beside pref) ) left_align (port up_align suff)
- }
- }
-
+
val arr_of_size = ListBuffer.empty[Int]
- def pC(r: Rexp): Set[Rexp] = {//PD's companion
- r match {
- case SEQ(r1, r2) => pC(r2)
- case ALTS(rs) => rs.flatMap(a => pC(a) ).toSet
- case CHAR(c) => Set(r)
- case r => Set()
- }
- }
- def illustration(r: Rexp, s: String){
- var i_like_imperative_style = internalise(r)
- val all_chars = s.toList
- for (i <- 0 to s.length - 1){
- val der_res = bder(all_chars(i), i_like_imperative_style)
- val simp_res = bsimp(der_res)
- println("The original regex, the regex after derivative w.r.t " + all_chars(i) + " and the simplification of the derivative.")
- println(aregx_tree(i_like_imperative_style) up_align aregx_tree(der_res) up_align aregx_tree(simp_res))
- //println(asize(i_like_imperative_style), asize(der_res), asize(simp_res))
- arr_of_size += asize(i_like_imperative_style)
- //println(asize(simp_res), asize(simp_res) / arr_of_size(0) )
- i_like_imperative_style = simp_res
- }
- arr_of_size += asize(i_like_imperative_style)
- }
+
val ran = scala.util.Random
var alphabet_size = 3
def balanced_seq_star_gen(depth: Int, star: Boolean): Rexp = {
@@ -252,20 +27,15 @@
SEQ(balanced_seq_star_gen(depth - 1, true), balanced_seq_star_gen(depth - 1, true))
}
}
- def max(i: Int, j: Int): Int = {
- if(i > j)
- i
- else
- j
- }
def random_struct_gen(depth:Int): Rexp = {
val dice = ran.nextInt(3)
val dice2 = ran.nextInt(3)
- (dice, depth) match {
+ val new_depth = (List(0, depth - 1 - dice2)).max
+ (dice, new_depth) match {
case (_, 0) => ((ran.nextInt(3) + 97).toChar).toString
- case (0, i) => STAR(random_struct_gen(max(0, i - 1 - dice2)))
- case (1, i) => SEQ(random_struct_gen(max(0, i - 1 - dice2)), random_struct_gen(max(0, i - 1 - dice2)))
- case (2, i) => ALTS( List(random_struct_gen(max(0, i - 1 - dice2)), random_struct_gen(max(0, i - 1 - dice2))) )
+ case (0, i) => STAR(random_struct_gen(new_depth))
+ case (1, i) => SEQ(random_struct_gen(new_depth), random_struct_gen(new_depth))
+ case (2, i) => ALTS( List(random_struct_gen(new_depth), random_struct_gen(new_depth)) )
}
}
def balanced_struct_gen(depth: Int): Rexp = {
@@ -297,41 +67,8 @@
println(b(0),b.max)
}
- def dep_exp(depth: List[Int]){
- for(i <- depth){
- arr_of_size.clear()
- val s = rd_string_gen(alphabet_size, (i-8)*(i-8)+10)
- val r = random_struct_gen(i)
- println("depth: "+i)
- illustration(r, s) //"abcabadaaadcabdbabcdaadbabbcbbdabdabbcbdbabdbcdb")
- //println("depth: " + i + " general stats:"+ arr_of_size(0), arr_of_size.max, arr_of_size.max/arr_of_size(0))
- //println("x y label alignment")
- /*for(i <- 0 to s.length - 1){
- if(s(i) == '\n')
- println(i+" "+arr_of_size(i)+" "+"nl"+" -140")
- else if(s(i) == ' ')
- println(i+" "+arr_of_size(i)+" "+"sp"+" -140")
- else
- println(i+" "+arr_of_size(i)+" "+s(i)+" -140")
- }*/
- //println(s.length + " " + arr_of_size(s.length) + " ]" + " -140")
- }
- }
- def case_study(ss: List[String], r: Rexp){
- for(s <- ss){
- arr_of_size.clear()
- illustration(r, s)
- println("x y label alignment")
- for(i <- 0 to s.length - 1){
- if(s(i) == '\n')
- println(i+" "+arr_of_size(i)+" "+"nl"+" -140")
- else if(s(i) == ' ')
- println(i+" "+arr_of_size(i)+" "+"sp"+" -140")
- else
- println(i+" "+arr_of_size(i)+" "+s(i)+" -140")
- }
- }
- }
+
+
def star_gen(dp: Int): Rexp = {
if(dp > 0)
STAR(star_gen(dp - 1))
@@ -346,48 +83,9 @@
Nil
}
}
- def regx_print(r: Rexp): String = {
- r match {
- case ZERO =>
- "ZERO"
- case CHAR(c) => {
- //val Some(c) = alphabet.find(f)
- "\"" + c.toString + "\""
- }
- case ONE => {
- "ONE"
- }
- case ALTS(rs) => {
- "ALTS(List("+(rs.map(regx_print)).foldLeft("")((a, b) => if(a == "") b else a + "," + b)+"))"
- }
- case SEQ(r1, r2) => {
- "SEQ(" + regx_print(r1) + "," + regx_print(r2) + ")"
- }
- case STAR(r) => {
- "STAR(" + regx_print(r) + ")"
- }
- }
- }
+
val mkst = "abcdefghijklmnopqrstuvwxyz"
-
- def inclusion_truth(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = {
- val aset = anatomy.toSet
- if(aset subsetOf pd){
- true
- }
- else{
- println("inclusion not true")
- false
- }
- }
- def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true}
- def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true}
- def ders_simp(r: ARexp, s: List[Char]): ARexp = {
- s match {
- case Nil => r
- case c::cs => ders_simp(bsimp(bder(c, r)), cs)
- }
- }
+
val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c')))))))
val str_panda = "ccccb"
@@ -396,28 +94,28 @@
case b::bs1 => elem(b.toString) beside bstostick(bs1)
case Nil => elem(' ', 1, 1)
}
- def bits_print(r: ARexp): Element = r match {
+ def aregex_simple_print(r: ARexp): Element = r match {
case AALTS(bs,rs) => {
val bitstick = bstostick(bs)
rs match {
case r::rs1 =>
rs1.foldLeft(
((elem("(") left_align bitstick) beside
- bits_print(r)) )( (acc, r2) => acc beside ((elem(" + ") above elem(" ")) beside bits_print(r2) )) beside
+ aregex_simple_print(r)) )( (acc, r2) => acc beside ((elem(" + ") above elem(" ")) beside aregex_simple_print(r2) )) beside
elem(")")
case Nil => elem(' ', 1, 1)
}
}
case ASEQ(bs, r1, r2) => {
- ((elem("[") left_align bstostick(bs)) beside bits_print(r1) beside elem("~") beside bits_print(r2) beside (elem("]") above elem(" ")))
+ ((elem("[") left_align bstostick(bs)) beside aregex_simple_print(r1) beside elem("~") beside aregex_simple_print(r2) beside (elem("]") above elem(" ")))
}
case ASTAR(bs, r) => {
r match {
case AONE(_) | AZERO | ACHAR(_, _) => {
- (elem("{") left_align bstostick(bs)) beside (bits_print(r) beside elem("}*"))
+ (elem("{") left_align bstostick(bs)) beside (aregex_simple_print(r) beside elem("}*"))
}
case _ => {
- (elem("{") left_align bstostick(bs)) beside (bits_print(r) beside elem("}*"))
+ (elem("{") left_align bstostick(bs)) beside (aregex_simple_print(r) beside elem("}*"))
}
}
}
@@ -432,30 +130,30 @@
elem ("0") above elem(" ")
}
}
- def happy_print(r: Rexp): Unit = r match {
+ def regex_simple_print(r: Rexp): Unit = r match {
case ALTS( r::rs ) => {
print("(")
- happy_print(r)
+ regex_simple_print(r)
rs.map(r2=>{
print(" + ")
- happy_print(r2)
+ regex_simple_print(r2)
})
print(")")
}
case SEQ(r1, r2) => {
- happy_print(r1)
+ regex_simple_print(r1)
print("~")
- happy_print(r2)
+ regex_simple_print(r2)
}
case STAR(r) => {
r match {
case ONE | ZERO | CHAR(_) => {
- happy_print(r)
+ regex_simple_print(r)
print("*")
}
case _ => {
print("[")
- happy_print(r)
+ regex_simple_print(r)
print("]*")
}
}
@@ -470,42 +168,30 @@
print(c)
}
}
- def bits_slide(s: String, r: Rexp){
- val nangao = ders_simp(internalise(r), s.toList)
- val easy = bsimp(bders(s.toList, internalise(r)))
- println(s)
- println(r)
- happy_print(r)
- println()
- print(bits_print(nangao))
- println()
- print(bits_print(easy))
- println()
- }
//found r = SEQ(ALTS(List(CHAR(c), CHAR(a))),SEQ(ALTS(List(CHAR(a), CHAR(c))),ALTS(List(CHAR(b), CHAR(c))))) s = "aa"
def bsimp_print(r: ARexp): Unit = r match {
case ASEQ(bs, r1, r2) =>
{
println("0.r or r.0 to 0 or bs1 1.r to fuse bs++bs1 r")
- bits_print(bsimp(r1))
- bits_print(bsimp(r2))
+ aregex_simple_print(bsimp(r1))
+ aregex_simple_print(bsimp(r2))
}
case AALTS(bs, rs) => {
println("rs.map(bsimp) equals *************")
val rs_simp = rs.map(bsimp)
for(r <- rs_simp){
- println(bits_print(r))
+ println(aregex_simple_print(r))
}
println("*************")
println("flts(rs_simp)")
val flat_res = flats(rs_simp)
for(r <- flat_res){
- println(bits_print(r))
+ println(aregex_simple_print(r))
}
println("dB(flat_res)")
val dist_res = distinctBy(flat_res, erase)
for(r <- dist_res){
- println(bits_print(r))
+ println(aregex_simple_print(r))
}
dist_res match {
case Nil => println("AZERO")
@@ -515,99 +201,16 @@
}
case _ => println("No simp at this level")
}
- def tellmewhy(){
- //val r = SEQ(ALTS(List(CHAR('a'), CHAR('b'))),ALTS(List(ALTS(List(CHAR('a'), CHAR('a'))), STAR(CHAR('a')))))
- //val r = SEQ(ALTS(List(CHAR('a'), CHAR('b'))),ALTS(List(CHAR('a'), STAR(CHAR('a')) ) ))
- val r = ("ab" | ( (("a")%) | "aa") )
- //val r = ("a"|"b")~("a")
- val s = "aa"
- for(i <- 0 to s.length-1){
- val ss = s.slice(0, i+1)
- val nangao = bders_simp_rf(ss.toList, internalise(r))
- val easy = (bders(ss.toList, internalise(r)))
- println("iteration starts")
- println("bders_simp")
- println(bits_print(nangao))
- println()
- println("bders")
- println(bits_print(easy))
- println()
- println("new simp")
- println(bits_print(bsimp_rf(easy)))
- println("iteration ends")
- }
- println("old simp ders")
- println(bits_print(bsimp(bders(s.toList, internalise(r)))))
- println("old derssimp")
- println(bits_print(ders_simp(internalise(r), s.toList)))
- }
- def find_re(){
- for (i <- 1 to 10000){
- val r = balanced_struct_gen(3)
- val s = rd_string_gen(2,1)
- val nangao = ders_simp(internalise(r), s.toList)
- val easy = bsimp(bders(s.toList, internalise(r)))
- if(nangao != easy){
- bits_slide(s, r)
- }
- }
- }
+
+
//simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set)
def pushbits(r: ARexp): ARexp = r match {
case AALTS(bs, rs) => AALTS(Nil, rs.map(r=>fuse(bs, pushbits(r))))
case ASEQ(bs, r1, r2) => ASEQ(bs, pushbits(r1), pushbits(r2))
case r => r
}
- def correctness_proof_convenient_path(){
- for(i <- 1 to 19999){
- val s = rd_string_gen(alphabet_size, 3)//"abaa"//rd_string_gen(alphabet_size, 3)
- val r = internalise(random_struct_gen(4))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
- for(j <- 0 to s.length - 1){
- val ss = s.slice(0, j+ 1)
- val nangao = bders_simp_rf(ss.toList, r)
- val easy = bsimp_rf(bders_rf(ss.toList, r))
- if(!(nangao == easy)){
- println(j)
- println("not equal")
- println("string")
- println(ss)
- println("original regex")
- println(annotated_tree(r))
- println("regex after ders simp")
- println(annotated_tree(nangao))
- println("regex after ders")
- println(annotated_tree(bders(ss.toList, r)))//flats' fuse when opening up AALTS causes the difference
- println("regex after ders and then a single simp")
- println(annotated_tree(easy))
- }
- }
- }
- }
- /* if(bts == cdbts){//test of equality code v = retrieve internalise(r) v if |- v : r
- println(bts)
- println(cdbts)
- println("code v = retrieve internalise(r) v if |- v : r")
- }
- val r_der_s = bders(st.toList, rg)
- println(aregx_tree(r_der_s))
- val bts = retrieve(r_der_s, unsimplified_vl)
- val cdbts = code(unsimplified_vl)
- val simprg = bsimp(r_der_s)
- val frectv = decode(erase(simprg), cdbts)
- val simpbts = retrieve(simprg, frectv)
- if(bts == simpbts){
- println("retrieve r v = retrieve (bsimp r) (decode bsimp(r) code(v))")
- println("bits:")
- //println(bts)
- println(simpbts)
- println("v = ")
- println(unsimplified_vl)
- println("frect v = ")
- println(frectv)
- }
- *///KH8W5BXL
def nice_lex(r: Rexp, s: List[Char], ar: ARexp) : Val = s match {
case Nil =>
if (nullable(r)){
@@ -688,17 +291,6 @@
println(s"bsimp(bder c r) = ${unsimp}, whereas bsimp(bder c bsimp r) = ${simped}")
}
}
- def essence_posix(){
- //val s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"//rd_string_gen(alphabet_size, 3)//"abaa"//rd_string_gen(alphabet_size, 3)
- val s0 = "a"
- val r = SEQ(STAR(ALT("a", "aa")), "b")//internalise(random_struct_gen(4))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
- for(i <- 1 to 40){
- val s = s0*i
- //printf("%d %d\n",i, size(ders(s.toList, r)))
- printf("%d %d\n",i, asize(ders_simp( internalise(r), s.toList)))
- //println(asize(ders_simp( internalise(r), s.toList)))
- }
- }
def speed_test(){
val s0 = "a"*1000
@@ -825,11 +417,11 @@
val v1 = inj(erase(r), c, v0)
val v2 = fuzzy_inj(r, c, v)
println("regular expression original")
- println(bits_print(r))
+ println(aregex_simple_print(r))
println("regular expression derivative")
- println(bits_print(dr))
+ println(aregex_simple_print(dr))
println("regular expression simped derivative")
- println(bits_print(sdr))
+ println(aregex_simple_print(sdr))
println("simplified value")
println(v)
println("unsimplified value")
@@ -842,36 +434,32 @@
println("fuzzy_inj "+r+c+v)
}
def vunsimp_test(){
- for(i <- 1 to 1000){
- val r = random_struct_gen(5)//[{ a}*~{{a}*}*]
- val c = (ran.nextInt(3) + 97).toChar//Sequ(Stars(List()),Stars(List()))
- val dr = der(c, r)
- val bdr = bder(c, internalise(r))
- if(nullable(dr)){
- println("bohooooooooooooooooo!")
- val dv = mkeps(dr)
- val target_vr = bsimp2(bdr, dv)
- println(bits_print(target_vr._1))
- println(target_vr._2)
- println(vunsimp(bdr, dv)(target_vr._2))
- }
- }
+ /*val bdr = AALTS(
+ List(),
+ List(
+ AONE(List(S,Z)),
+ ASTAR(List(Z,Z,S), ACHAR(List(),'c')),
+ ASEQ(List(Z), ASTAR(List(S), ACHAR(List(), 'c')), ASTAR(List(S), ACHAR(List(), 'c') ) )
+ )
+ )*/
+ val bdr = AALTS(List(),List(AALTS(List(Z),List(ASEQ(List(),
+ ASEQ(List(),AONE(List(S)),ASTAR(List(),ACHAR(List(),'c'))),ASTAR(List(),ACHAR(List(),'c'))),
+ ASEQ(List(Z),AONE(List(S)),ASTAR(List(),ACHAR(List(),'c'))))), AALTS(List(S),List(AONE(List(Z)), AZERO))))
+ val dr = erase(bdr)
+ val dv = mkeps(dr)
+ //val bdr = bder(c, internalise(r))
+ //val dv = mkeps(dr)
+ println(aregex_simple_print(bdr))
+ println(dv)
+ val target_vr = bsimp2(bdr, dv)
+ println(aregex_simple_print(target_vr._1))
+ println(target_vr._2)
+ println(vunsimp(bdr, dv)(target_vr._2))
}
+
+
def main(args: Array[String]) {
- //finj_test0('b', fuse(List(S, Z), internalise( ( ("a"|"b")% ) ) ))
- //finj_test0('a', fuse(List(S, Z), internalise( ( ("a"|"b")% ) ) ))
- //finj_test0('b', bder('a', internalise( (("c" | "ab")%) )))
- //finj_test0('a', internalise( (("c" | "ab")%) ))
- //this example gives us an exception
- //val r = internalise(ALTS(List(STAR(CHAR('c')), STAR(CHAR('b')))))
- //val c = 'c'
- //if(bder(c, r) != internalise(der(c, erase(r))))
- //println("noooooo!")
- //println(bits_print(r))
- //println(c)
- //finj_test0(c, r)
vunsimp_test()
}
}
-//List( ASTAR(List(Z),ACHAR(List(),a)), AALTS(List(S),List(ACHAR(List(Z),b), ACHAR(List(S),a))) )
--- a/lex_blex_Frankensteined.scala Thu May 07 11:36:15 2020 +0100
+++ b/lex_blex_Frankensteined.scala Wed May 27 22:23:52 2020 +0100
@@ -1,9 +1,13 @@
-package RexpRelated
+
+import Element.elem
import scala.language.implicitConversions
import scala.language.reflectiveCalls
import scala.annotation.tailrec
import scala.util.Try
+
+package RexpRelated
+{
abstract class Bit
case object Z extends Bit
case object S extends Bit
@@ -31,6 +35,10 @@
def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
+
+
+
+
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
case Nil => Nil
case (x::xs) => {
@@ -661,6 +669,10 @@
case Sequ(v1small, v2small) => Sequ(vunsimp(r1, v1)(v1small), vunsimp(r2, v2)(v2small))
case _ => {
println(vsmall) ;
+ println(aregex_simple_print(r1))
+ println(v1)
+ println(aregex_simple_print( r2))
+ println(v2)
throw new Exception("bad arguments sequ")
}
//(ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s))
@@ -828,11 +840,11 @@
}
def deunify(rs_length: Int, v: Val): Val = v match{
case Position(i, v0) => {
- if(i <0) {println(rs_length, v); throw new Exception("deunify minus")}
- else if(rs_length == 1) {println(v); v}
- else if(rs_length - 1 == i) coat(v0, i)
- else coat(Left(v0), i)
- }
+ if(i <0) {throw new Exception("deunify minus")}
+ else if(rs_length == 1) { v0}
+ else if(rs_length - 1 == i) {coat(v0, i)}
+ else {coat(Left(v0), i)}
+ }
case _ => throw new Exception("deunify error")
}
def flats2(front: List[ARexp], i: Int, rs: List[ARexp], v: Val): (List[ARexp], Val) = v match {
@@ -856,15 +868,16 @@
}
case _ => throw new Exception("flats2 error")
}
- def distinctBy2[B, C](v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match {
+ def distinctBy2[B, C](j: Int, v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match {
case Nil => (res, v)
case (x::xs) => {
val re = f(x)
if (acc.contains(re)) v match {
- case Position(i, v0) => distinctBy2(Position(i - 1, v0), xs, f, acc, res)
+ case Position(i, v0) => if(j > 0) distinctBy2(j - 1, Position(i - 1, v0), xs, f, acc, res)
+ else if(j < 0)distinctBy2(j - 1, Position(i, v0), xs, f, acc, res) else throw new Exception("Value not POSIX")
case _ => throw new Exception("dB2")
}
- else distinctBy2(v, xs, f, re::acc, x::res)
+ else distinctBy2(j - 1, v, xs, f, re::acc, x::res)
}
}
def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
@@ -875,14 +888,21 @@
case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s))
}
case (AALTS(bs1, rs), v) => {
+ println("Entry into AALTS")
+ rs.map(println(aregex_simple_print()))
val vlist = unify(rs, v)
vlist match {
case Position(i, v0) => {
val v_simp = bsimp2(rs(i), v0)._2
+ println("map on bsimp")
val rs_simp = rs.map(bsimp)
val flat_res = flats2( Nil, i, rs_simp, Position(i, v_simp) )
- val dist_res = distinctBy2(flat_res._2, flat_res._1, erase)
+ println("list after flats2")
+ flat_res._1.map(println(aregex_simple_print(_)))
+ val dist_res = distinctBy2(i, flat_res._2, flat_res._1, erase)
+ println("list after distinctBy2")
val rs_new = dist_res._1
+ rs_new.map(println(aregex_simple_print(_)))
val v_new = deunify(rs_new.length, dist_res._2)
rs_new match {
case Nil => (AZERO, undefined)
@@ -1283,7 +1303,48 @@
//--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
+ def bstostick(bs: List[Bit]): Element = bs match {
+ //case b::Nil => elem(b.toString)
+ case b::bs1 => elem(b.toString) beside bstostick(bs1)
+ case Nil => elem(' ', 1, 1)
+ }
+ def aregex_simple_print(r: ARexp): Element = r match {
+ case AALTS(bs,rs) => {
+ val bitstick = bstostick(bs)
+ rs match {
+ case r::rs1 =>
+ rs1.foldLeft(
+ ((elem("(") left_align bitstick) beside
+ aregex_simple_print(r)) )( (acc, r2) => acc beside ((elem(" + ") above elem(" ")) beside aregex_simple_print(r2) )) beside
+ elem(")")
+ case Nil => elem(' ', 1, 1)
+ }
+ }
+ case ASEQ(bs, r1, r2) => {
+ ((elem("[") left_align bstostick(bs)) beside aregex_simple_print(r1) beside elem("~") beside aregex_simple_print(r2) beside (elem("]") above elem(" ")))
+ }
+ case ASTAR(bs, r) => {
+ r match {
+ case AONE(_) | AZERO | ACHAR(_, _) => {
+ (elem("{") left_align bstostick(bs)) beside (aregex_simple_print(r) beside elem("}*"))
+ }
+ case _ => {
+ (elem("{") left_align bstostick(bs)) beside (aregex_simple_print(r) beside elem("}*"))
+ }
+ }
+ }
+ case AONE(bs) => {
+ elem("1") left_align bstostick(bs)
+ }
+ case ACHAR(bs, c) => {
+ elem(c, 1, 1) left_align bstostick(bs)
+ }
+ case AZERO =>
+ {
+ elem ("0") above elem(" ")
+ }
+ }
def clear() = {
@@ -1345,6 +1406,8 @@
}
+
+
import Rexp.Bits
abstract class ARexp
case object AZERO extends ARexp
@@ -1368,3 +1431,4 @@
case object undefined extends Val
//case class Pos(i: Int, v: Val) extends Val
case object Prd extends Val
+}