1 // eNFA that does not accept any string  | 
     1 // Thompson Construction  | 
     2 def eNFA_ZERO(): NFA[TState, Char] = { | 
     2 // (needs  :load nfa.scala  | 
         | 
     3 //         :load enfa.scala)  | 
         | 
     4   | 
         | 
     5   | 
         | 
     6 // states for Thompson construction  | 
         | 
     7 case class TState(i: Int) extends State  | 
         | 
     8   | 
         | 
     9 object TState { | 
         | 
    10   var counter = 0  | 
         | 
    11     | 
         | 
    12   def apply() : TState = { | 
         | 
    13     counter += 1;  | 
         | 
    14     new TState(counter - 1)  | 
         | 
    15   }  | 
         | 
    16 }  | 
         | 
    17   | 
         | 
    18   | 
         | 
    19 // some types abbreviations  | 
         | 
    20 type NFAt = NFA[TState, Char]  | 
         | 
    21 type NFAtrans = (TState, Char) :=> Set[TState]  | 
         | 
    22 type eNFAtrans = (TState, Option[Char]) :=> Set[TState]  | 
         | 
    23   | 
         | 
    24   | 
         | 
    25 // for composing an eNFA transition with a NFA transition  | 
         | 
    26 implicit class RichPF(val f: eNFAtrans) extends AnyVal { | 
         | 
    27   def +++(g: NFAtrans) : eNFAtrans =   | 
         | 
    28   { case (q, None) =>  applyOrElse(f, (q, None))  | 
         | 
    29     case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c))  }  | 
         | 
    30 }  | 
         | 
    31   | 
         | 
    32   | 
         | 
    33 // NFA that does not accept any string  | 
         | 
    34 def NFA_ZERO(): NFAt = { | 
     3   val Q = TState()  | 
    35   val Q = TState()  | 
     4   NFA(Set(Q), { case _ => Set() }, Set()) | 
    36   NFA(Set(Q), { case _ => Set() }, Set()) | 
     5 }  | 
    37 }  | 
     6   | 
    38   | 
     7 // eNFA that accepts the empty string  | 
    39 // NFA that accepts the empty string  | 
     8 def eNFA_ONE() : NFA[TState, Char] = { | 
    40 def NFA_ONE() : NFAt = { | 
     9   val Q = TState()  | 
    41   val Q = TState()  | 
    10   NFA(Set(Q), { case _ => Set() }, Set(Q)) | 
    42   NFA(Set(Q), { case _ => Set() }, Set(Q)) | 
    11 }  | 
    43 }  | 
    12   | 
    44   | 
    13 // eNFA that accepts the string "c"  | 
    45 // NFA that accepts the string "c"  | 
    14 def eNFA_CHAR(c: Char) : NFA[TState, Char] = { | 
    46 def NFA_CHAR(c: Char) : NFAt = { | 
    15   val Q1 = TState()  | 
    47   val Q1 = TState()  | 
    16   val Q2 = TState()  | 
    48   val Q2 = TState()  | 
    17   NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) | 
    49   NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) | 
    18 }  | 
    50 }  | 
    19   | 
    51   | 
    20 // alternative of two eNFAs  | 
    52 // sequence of two NFAs  | 
    21 def eNFA_ALT(enfa1: NFA[TState, Char], enfa2: NFA[TState, Char]) : NFA[TState, Char] = { | 
    53 def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = { | 
         | 
    54   val new_delta : eNFAtrans =   | 
         | 
    55     { case (q, None) if enfa1.fins(q) => enfa2.starts } | 
         | 
    56     | 
         | 
    57   eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta,   | 
         | 
    58        enfa2.fins)  | 
         | 
    59 }  | 
         | 
    60   | 
         | 
    61 // alternative of two NFAs  | 
         | 
    62 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = { | 
    22   val Q = TState()  | 
    63   val Q = TState()  | 
    23   val new_delta : eNFAtrans =   | 
    64   val new_delta : eNFAtrans =   | 
    24     { case (Q, None) => enfa1.starts | enfa2.starts } | 
    65     { case (Q, None) => enfa1.starts | enfa2.starts } | 
    25   | 
         | 
    26   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)  | 
    66   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)  | 
    27   | 
    67   | 
    28   eNFA(Set(Q),   | 
    68   eNFA(Set(Q), new_delta +++ enfa1.delta +++ enfa2.delta, new_fins)  | 
    29        new_delta andAlso enfa1.delta andAlso enfa2.delta,   | 
         | 
    30        new_fins)  | 
         | 
    31 }  | 
    69 }  | 
    32   | 
    70   | 
    33 // sequence of two eNFAs  | 
    71 // star of a NFA  | 
    34 def eNFA_SEQ(enfa1: NFA[TState, Char], enfa2: NFA[TState, Char]) : NFA[TState, Char] = { | 
    72 def NFA_STAR(enfa: NFAt) : NFAt = { | 
    35   val new_delta : eNFAtrans =   | 
         | 
    36     { case (q, None) if enfa1.fins(q) => enfa2.starts } | 
         | 
    37     | 
         | 
    38   eNFA(enfa1.starts,   | 
         | 
    39        new_delta andAlso enfa1.delta andAlso enfa2.delta,   | 
         | 
    40        enfa2.fins)  | 
         | 
    41 }  | 
         | 
    42   | 
         | 
    43 // star of an eNFA  | 
         | 
    44 def eNFA_STAR(enfa: NFA[TState, Char]) : NFA[TState, Char] = { | 
         | 
    45   val Q = TState()  | 
    73   val Q = TState()  | 
    46   val new_delta : eNFAtrans =   | 
    74   val new_delta : eNFAtrans =   | 
    47     { case (Q, None) => enfa.starts | 
    75     { case (Q, None) => enfa.starts | 
    48       case (q, None) if enfa.fins(q) => Set(Q) }  | 
    76       case (q, None) if enfa.fins(q) => Set(Q) }  | 
    49   | 
    77   | 
    50   eNFA(Set(Q), new_delta andAlso enfa.delta, Set(Q))  | 
    78   eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q))  | 
    51 }  | 
    79 }  | 
         | 
    80   | 
         | 
    81   | 
         | 
    82   | 
         | 
    83 // regular expressions  | 
         | 
    84 abstract class Rexp  | 
         | 
    85 case object ZERO extends Rexp                    // matches nothing  | 
         | 
    86 case object ONE extends Rexp                     // matches the empty string  | 
         | 
    87 case class CHAR(c: Char) extends Rexp            // matches a character c  | 
         | 
    88 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  // alternative  | 
         | 
    89 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  // sequence  | 
         | 
    90 case class STAR(r: Rexp) extends Rexp            // star  | 
         | 
    91   | 
         | 
    92   | 
         | 
    93   | 
         | 
    94   | 
         | 
    95 // thompson construction   | 
         | 
    96 def thompson (r: Rexp) : NFAt = r match { | 
         | 
    97   case ZERO => NFA_ZERO()  | 
         | 
    98   case ONE => NFA_ONE()  | 
         | 
    99   case CHAR(c) => NFA_CHAR(c)    | 
         | 
   100   case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))  | 
         | 
   101   case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))  | 
         | 
   102   case STAR(r1) => NFA_STAR(thompson(r1))  | 
         | 
   103 }  | 
         | 
   104   | 
         | 
   105   | 
         | 
   106 def tmatches(r: Rexp, s: String) : Boolean =  | 
         | 
   107   thompson(r).accepts(s.toList)  | 
         | 
   108   | 
         | 
   109 def tmatches2(r: Rexp, s: String) : Boolean =  | 
         | 
   110   thompson(r).accepts2(s.toList)  | 
         | 
   111   | 
         | 
   112   | 
         | 
   113 //optional regular expression (one or zero times)  | 
         | 
   114 def OPT(r: Rexp) = ALT(r, ONE)  | 
         | 
   115   | 
         | 
   116 //n-times regular expression (explicitly expanded)  | 
         | 
   117 def NTIMES(r: Rexp, n: Int) : Rexp = n match { | 
         | 
   118   case 0 => ONE  | 
         | 
   119   case 1 => r  | 
         | 
   120   case n => SEQ(r, NTIMES(r, n - 1))  | 
         | 
   121 }  | 
         | 
   122   | 
         | 
   123   | 
         | 
   124 // Test Cases  | 
         | 
   125   | 
         | 
   126 // the evil regular expression  a?{n} a{n} | 
         | 
   127 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) | 
         | 
   128   | 
         | 
   129 // the evil regular expression (a*)*b  | 
         | 
   130 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) | 
         | 
   131   | 
         | 
   132 //for measuring time  | 
         | 
   133 def time_needed[T](i: Int, code: => T) = { | 
         | 
   134   val start = System.nanoTime()  | 
         | 
   135   for (j <- 1 to i) code  | 
         | 
   136   val end = System.nanoTime()  | 
         | 
   137   (end - start)/(i * 1.0e9)  | 
         | 
   138   | 
         | 
   139   | 
         | 
   140 // the size of the NFA can be large,   | 
         | 
   141 // thus slowing down the breadth-first search  | 
         | 
   142   | 
         | 
   143 for (i <- 1 to 10) { | 
         | 
   144   println(i + ": " + "%.5f".format(time_needed(2, tmatches(EVIL1(i), "a" * i))))  | 
         | 
   145 }  | 
         | 
   146   | 
         | 
   147 for (i <- 1 to 10) { | 
         | 
   148   println(i + " " + "%.5f".format(time_needed(2, tmatches(EVIL2, "a" * i))))  | 
         | 
   149 }  | 
         | 
   150   | 
         | 
   151   | 
         | 
   152 // the backtracking needed in depth-first search   | 
         | 
   153 // can be painfully slow  | 
         | 
   154   | 
         | 
   155 for (i <- 1 to 8) { | 
         | 
   156   println(i + " " + "%.5f".format(time_needed(2, tmatches2(EVIL2, "a" * i))))  | 
         | 
   157 }  |