progs/thompson.scala
changeset 487 a697421eaa04
parent 486 8178fcf377dc
child 488 598741d39d21
equal deleted inserted replaced
486:8178fcf377dc 487:a697421eaa04
     1 // eNFA that does not accept any string
     1 // Thompson Construction
     2 def eNFA_ZERO(): NFA[TState, Char] = {
     2 // (needs  :load nfa.scala
       
     3 //         :load enfa.scala)
       
     4 
       
     5 
       
     6 // states for Thompson construction
       
     7 case class TState(i: Int) extends State
       
     8 
       
     9 object TState {
       
    10   var counter = 0
       
    11   
       
    12   def apply() : TState = {
       
    13     counter += 1;
       
    14     new TState(counter - 1)
       
    15   }
       
    16 }
       
    17 
       
    18 
       
    19 // some types abbreviations
       
    20 type NFAt = NFA[TState, Char]
       
    21 type NFAtrans = (TState, Char) :=> Set[TState]
       
    22 type eNFAtrans = (TState, Option[Char]) :=> Set[TState]
       
    23 
       
    24 
       
    25 // for composing an eNFA transition with a NFA transition
       
    26 implicit class RichPF(val f: eNFAtrans) extends AnyVal {
       
    27   def +++(g: NFAtrans) : eNFAtrans = 
       
    28   { case (q, None) =>  applyOrElse(f, (q, None)) 
       
    29     case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c))  }
       
    30 }
       
    31 
       
    32 
       
    33 // NFA that does not accept any string
       
    34 def NFA_ZERO(): NFAt = {
     3   val Q = TState()
    35   val Q = TState()
     4   NFA(Set(Q), { case _ => Set() }, Set())
    36   NFA(Set(Q), { case _ => Set() }, Set())
     5 }
    37 }
     6 
    38 
     7 // eNFA that accepts the empty string
    39 // NFA that accepts the empty string
     8 def eNFA_ONE() : NFA[TState, Char] = {
    40 def NFA_ONE() : NFAt = {
     9   val Q = TState()
    41   val Q = TState()
    10   NFA(Set(Q), { case _ => Set() }, Set(Q))
    42   NFA(Set(Q), { case _ => Set() }, Set(Q))
    11 }
    43 }
    12 
    44 
    13 // eNFA that accepts the string "c"
    45 // NFA that accepts the string "c"
    14 def eNFA_CHAR(c: Char) : NFA[TState, Char] = {
    46 def NFA_CHAR(c: Char) : NFAt = {
    15   val Q1 = TState()
    47   val Q1 = TState()
    16   val Q2 = TState()
    48   val Q2 = TState()
    17   NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2))
    49   NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2))
    18 }
    50 }
    19 
    51 
    20 // alternative of two eNFAs
    52 // sequence of two NFAs
    21 def eNFA_ALT(enfa1: NFA[TState, Char], enfa2: NFA[TState, Char]) : NFA[TState, Char] = {
    53 def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = {
       
    54   val new_delta : eNFAtrans = 
       
    55     { case (q, None) if enfa1.fins(q) => enfa2.starts }
       
    56   
       
    57   eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, 
       
    58        enfa2.fins)
       
    59 }
       
    60 
       
    61 // alternative of two NFAs
       
    62 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = {
    22   val Q = TState()
    63   val Q = TState()
    23   val new_delta : eNFAtrans = 
    64   val new_delta : eNFAtrans = 
    24     { case (Q, None) => enfa1.starts | enfa2.starts }
    65     { case (Q, None) => enfa1.starts | enfa2.starts }
    25 
       
    26   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)
    66   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)
    27 
    67 
    28   eNFA(Set(Q), 
    68   eNFA(Set(Q), new_delta +++ enfa1.delta +++ enfa2.delta, new_fins)
    29        new_delta andAlso enfa1.delta andAlso enfa2.delta, 
       
    30        new_fins)
       
    31 }
    69 }
    32 
    70 
    33 // sequence of two eNFAs
    71 // star of a NFA
    34 def eNFA_SEQ(enfa1: NFA[TState, Char], enfa2: NFA[TState, Char]) : NFA[TState, Char] = {
    72 def NFA_STAR(enfa: NFAt) : NFAt = {
    35   val new_delta : eNFAtrans = 
       
    36     { case (q, None) if enfa1.fins(q) => enfa2.starts }
       
    37   
       
    38   eNFA(enfa1.starts, 
       
    39        new_delta andAlso enfa1.delta andAlso enfa2.delta, 
       
    40        enfa2.fins)
       
    41 }
       
    42 
       
    43 // star of an eNFA
       
    44 def eNFA_STAR(enfa: NFA[TState, Char]) : NFA[TState, Char] = {
       
    45   val Q = TState()
    73   val Q = TState()
    46   val new_delta : eNFAtrans = 
    74   val new_delta : eNFAtrans = 
    47     { case (Q, None) => enfa.starts
    75     { case (Q, None) => enfa.starts
    48       case (q, None) if enfa.fins(q) => Set(Q) }
    76       case (q, None) if enfa.fins(q) => Set(Q) }
    49 
    77 
    50   eNFA(Set(Q), new_delta andAlso enfa.delta, Set(Q))
    78   eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q))
    51 }
    79 }
       
    80 
       
    81 
       
    82 
       
    83 // regular expressions
       
    84 abstract class Rexp
       
    85 case object ZERO extends Rexp                    // matches nothing
       
    86 case object ONE extends Rexp                     // matches the empty string
       
    87 case class CHAR(c: Char) extends Rexp            // matches a character c
       
    88 case class ALT(r1: Rexp, r2: Rexp) extends Rexp  // alternative
       
    89 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  // sequence
       
    90 case class STAR(r: Rexp) extends Rexp            // star
       
    91 
       
    92 
       
    93 
       
    94 
       
    95 // thompson construction 
       
    96 def thompson (r: Rexp) : NFAt = r match {
       
    97   case ZERO => NFA_ZERO()
       
    98   case ONE => NFA_ONE()
       
    99   case CHAR(c) => NFA_CHAR(c)  
       
   100   case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2))
       
   101   case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2))
       
   102   case STAR(r1) => NFA_STAR(thompson(r1))
       
   103 }
       
   104 
       
   105 
       
   106 def tmatches(r: Rexp, s: String) : Boolean =
       
   107   thompson(r).accepts(s.toList)
       
   108 
       
   109 def tmatches2(r: Rexp, s: String) : Boolean =
       
   110   thompson(r).accepts2(s.toList)
       
   111 
       
   112 
       
   113 //optional regular expression (one or zero times)
       
   114 def OPT(r: Rexp) = ALT(r, ONE)
       
   115 
       
   116 //n-times regular expression (explicitly expanded)
       
   117 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
       
   118   case 0 => ONE
       
   119   case 1 => r
       
   120   case n => SEQ(r, NTIMES(r, n - 1))
       
   121 }
       
   122 
       
   123 
       
   124 // Test Cases
       
   125 
       
   126 // the evil regular expression  a?{n} a{n}
       
   127 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
       
   128 
       
   129 // the evil regular expression (a*)*b
       
   130 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
       
   131 
       
   132 //for measuring time
       
   133 def time_needed[T](i: Int, code: => T) = {
       
   134   val start = System.nanoTime()
       
   135   for (j <- 1 to i) code
       
   136   val end = System.nanoTime()
       
   137   (end - start)/(i * 1.0e9)
       
   138 
       
   139 
       
   140 // the size of the NFA can be large, 
       
   141 // thus slowing down the breadth-first search
       
   142 
       
   143 for (i <- 1 to 10) {
       
   144   println(i + ": " + "%.5f".format(time_needed(2, tmatches(EVIL1(i), "a" * i))))
       
   145 }
       
   146 
       
   147 for (i <- 1 to 10) {
       
   148   println(i + " " + "%.5f".format(time_needed(2, tmatches(EVIL2, "a" * i))))
       
   149 }
       
   150 
       
   151 
       
   152 // the backtracking needed in depth-first search 
       
   153 // can be painfully slow
       
   154 
       
   155 for (i <- 1 to 8) {
       
   156   println(i + " " + "%.5f".format(time_needed(2, tmatches2(EVIL2, "a" * i))))
       
   157 }