progs/thompson.scala
changeset 491 d5776c6018f0
parent 489 e28d7a327870
child 521 95af9beb4b7f
equal deleted inserted replaced
490:4fee50f38305 491:d5776c6018f0
     1 // Thompson Construction
     1 // Thompson Construction
     2 // (needs  :load nfa.scala
     2 // (needs  :load dfa.scala
       
     3 //         :load nfa.scala
     3 //         :load enfa.scala)
     4 //         :load enfa.scala)
     4 
     5 
     5 
     6 
     6 // states for Thompson construction
     7 // states for Thompson construction
     7 case class TState(i: Int) extends State
     8 case class TState(i: Int) extends State
   117   thompson(r).accepts(s.toList)
   118   thompson(r).accepts(s.toList)
   118 
   119 
   119 def tmatches2(r: Rexp, s: String) : Boolean =
   120 def tmatches2(r: Rexp, s: String) : Boolean =
   120   thompson(r).accepts2(s.toList)
   121   thompson(r).accepts2(s.toList)
   121 
   122 
       
   123 // dfa via subset construction
       
   124 def tmatches_dfa(r: Rexp, s: String) : Boolean =
       
   125   subset(thompson(r)).accepts(s.toList)
   122 
   126 
   123 // Test Cases
   127 // Test Cases
   124 
   128 
   125 
   129 
   126 // the evil regular expression  a?{n} a{n}
   130 // the evil regular expression  a?{n} a{n}
   153 // can be painfully slow
   157 // can be painfully slow
   154 
   158 
   155 for (i <- 1 to 8) {
   159 for (i <- 1 to 8) {
   156   println(i + " " + "%.5f".format(time_needed(2, tmatches2(EVIL2, "a" * i))))
   160   println(i + " " + "%.5f".format(time_needed(2, tmatches2(EVIL2, "a" * i))))
   157 }
   161 }
       
   162 
       
   163 
       
   164 
       
   165 // while my thompson-enfa-subset-partial-function-chain
       
   166 // is probably not the most effcient way to obtain a fast DFA 
       
   167 // (the below should be much faster with a more direct construction),
       
   168 // in general the DFAs can be slow because of the state explosion
       
   169 // in the subset construction
       
   170 
       
   171 for (i <- 1 to 13) {
       
   172   println(i + ": " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL1(i), "a" * i))))
       
   173 }
       
   174 
       
   175 for (i <- 1 to 100 by 5) {
       
   176   println(i + " " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL2, "a" * i))))
       
   177 }