lexing: comparison thys2/blexer2.sc

equal deleted inserted replaced

-:d8740017324c
+:cb702fb4227f
 // else
 L(SEQ(acc, r), rs1)
 }
 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
-def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
+def rsprint(rs: Iterable[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
 def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
 // println("pakc")
 }
 }
 }
 }
-def attachCtx(r: ARexp, ctx: List[Rexp]) : List[Rexp] = {
+def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match {
-val res = breakIntoTerms(oneSimp(L(erase(r), ctx))).map(oneSimp)
+case SEQ(r1, r2)  => if(nullable(r1))
+strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) :::
+strongBreakIntoTerms(r2)
+else
+strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+case ALTS(r1, r2) => strongBreakIntoTerms(r1) ::: strongBreakIntoTerms(r2)
+case ZERO => Nil
+case _ => r :: Nil
+}
+def attachCtx(r: ARexp, ctx: List[Rexp]) : Set[Rexp] = {
+val res = strongBreakIntoTerms((L(erase(r), ctx))).map(oneSimp)
+res.toSet
+}
+def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
+inclusionPred(f(a, b), c)
+}
+def rexpListInclusion(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
+// println("r+ctx---------")
+// rsprint(rs1)
+// println("acc---------")
+// rsprint(rs2)
+val res = rs1.forall(rs2.contains)
+// println(res)
+// println("end------------------")
 res
 }
+def pAKC6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
-def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
+// println("pakc--------r")
-inclusionPred(f(a, b), c)
-}
-def rexpListInclusion(rs1: List[Rexp], rs2: List[Rexp]) : Boolean = {
-rs1.forall(rs2.contains)
-}
-def pAKC6(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
-// println("pakc")
 // println(shortRexpOutput(erase(r)))
-// println("acc")
+//   println("ctx---------")
+// rsprint(ctx)
+// println("pakc-------acc")
 // rsprint(acc)
-// println("ctx---------")
+// println("r+ctx broken down---------")
-// rsprint(ctx)
-// println("ctx---------end")
 // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
 // rprint(L(erase(r), ctx))
 //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
 if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
 }
 }
 }
-def distinctBy6(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+def distinctBy6(xs: List[ARexp], acc: Set[Rexp] = Set()) : List[ARexp] = xs match {
 case Nil =>
 Nil
 case x :: xs => {
 val erased = erase(x)
 if(acc.contains(erased)){
 distinctBy6(xs, acc)
 }
 else{
 val pruned = pAKC6(acc, x, Nil)
-val newTerms = breakIntoTerms(erase(pruned))
+val newTerms = strongBreakIntoTerms(erase(pruned))
 pruned match {
 case AZERO =>
 distinctBy6(xs, acc)
 case xPrime =>
-xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
 }
 }
 }
 }
 case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )
 }
 def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
 case ZERO => Set()
 case ONE => l
-case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
+case t => l.map( m => m._2 match
+{
+case ZERO => m
+case ONE => (m._1, t)
+case p => (m._1, SEQ(p, t))
+}
+)
 }
 def lf(r: Rexp): Lin = r match {
 case ZERO => Set()
 case ONE => Set()
 case CHAR(f) => {
 }
 def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
 case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
 case Nil => ts
 }
-def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] =
+ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
 def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
 //all implementation of partial derivatives that involve set union are potentially buggy
 //because they don't include the original regular term before they are pdered.
 //now only pderas is fixed.
-def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+def pderas(t: Set[Rexp], d: Int): Set[Rexp] =
+if(d > 0)
+pderas(lfs(t).map(mon => mon._2), d - 1) ++ t
+else
+lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
 def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
 def awidth(r: Rexp) : Int = r match {
 case CHAR(c) => 1
 case SEQ(r1, r2) => awidth(r1) + awidth(r2)
 case ALTS(r1, r2) => awidth(r1) + awidth(r2)
 }
 }
 // small()
-generator_test()
+// generator_test()
 def counterexample_check() {
-val r = STAR(SEQ(ALTS(ALTS(CHAR('b'),CHAR('c')),
+val r = SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
-SEQ(CHAR('b'),CHAR('b'))),ALTS(SEQ(ONE,CHAR('b')),CHAR('a'))))
+val s = "ccc"
-val s = "bbbb"
 val bdStrong5 = bdersStrong6(s.toList, internalise(r))
 val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
 val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
 println("original regex ")
 rprint(r)
 rsprint(bdStrong5Set)
 println("after pderUNIV")
 rsprint(pdersSet.toList)
 }
 // counterexample_check()
+def linform_test() {
+val r = STAR(SEQ(STAR(CHAR('c')), ONE))
+val r_linforms = lf(r)
+println(r_linforms.size)
+}
+linform_test()
 // 1
 def newStrong_test() {
 val r2 = (CHAR('b') | ONE)
 val r0 = CHAR('d')
 val r1 = (ONE | CHAR('c'))

changeset 526	cb702fb4227f
parent 518	ff7945a988a3
child 530	823d9b19d21c