1 // eNFA that does not accept any string |
1 // Thompson Construction |
2 def eNFA_ZERO(): NFA[TState, Char] = { |
2 // (needs :load nfa.scala |
|
3 // :load enfa.scala) |
|
4 |
|
5 |
|
6 // states for Thompson construction |
|
7 case class TState(i: Int) extends State |
|
8 |
|
9 object TState { |
|
10 var counter = 0 |
|
11 |
|
12 def apply() : TState = { |
|
13 counter += 1; |
|
14 new TState(counter - 1) |
|
15 } |
|
16 } |
|
17 |
|
18 |
|
19 // some types abbreviations |
|
20 type NFAt = NFA[TState, Char] |
|
21 type NFAtrans = (TState, Char) :=> Set[TState] |
|
22 type eNFAtrans = (TState, Option[Char]) :=> Set[TState] |
|
23 |
|
24 |
|
25 // for composing an eNFA transition with a NFA transition |
|
26 implicit class RichPF(val f: eNFAtrans) extends AnyVal { |
|
27 def +++(g: NFAtrans) : eNFAtrans = |
|
28 { case (q, None) => applyOrElse(f, (q, None)) |
|
29 case (q, Some(c)) => applyOrElse(f, (q, Some(c))) | applyOrElse(g, (q, c)) } |
|
30 } |
|
31 |
|
32 |
|
33 // NFA that does not accept any string |
|
34 def NFA_ZERO(): NFAt = { |
3 val Q = TState() |
35 val Q = TState() |
4 NFA(Set(Q), { case _ => Set() }, Set()) |
36 NFA(Set(Q), { case _ => Set() }, Set()) |
5 } |
37 } |
6 |
38 |
7 // eNFA that accepts the empty string |
39 // NFA that accepts the empty string |
8 def eNFA_ONE() : NFA[TState, Char] = { |
40 def NFA_ONE() : NFAt = { |
9 val Q = TState() |
41 val Q = TState() |
10 NFA(Set(Q), { case _ => Set() }, Set(Q)) |
42 NFA(Set(Q), { case _ => Set() }, Set(Q)) |
11 } |
43 } |
12 |
44 |
13 // eNFA that accepts the string "c" |
45 // NFA that accepts the string "c" |
14 def eNFA_CHAR(c: Char) : NFA[TState, Char] = { |
46 def NFA_CHAR(c: Char) : NFAt = { |
15 val Q1 = TState() |
47 val Q1 = TState() |
16 val Q2 = TState() |
48 val Q2 = TState() |
17 NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) |
49 NFA(Set(Q1), { case (Q1, d) if (c == d) => Set(Q2) }, Set(Q2)) |
18 } |
50 } |
19 |
51 |
20 // alternative of two eNFAs |
52 // sequence of two NFAs |
21 def eNFA_ALT(enfa1: NFA[TState, Char], enfa2: NFA[TState, Char]) : NFA[TState, Char] = { |
53 def NFA_SEQ(enfa1: NFAt, enfa2: NFAt) : NFAt = { |
|
54 val new_delta : eNFAtrans = |
|
55 { case (q, None) if enfa1.fins(q) => enfa2.starts } |
|
56 |
|
57 eNFA(enfa1.starts, new_delta +++ enfa1.delta +++ enfa2.delta, |
|
58 enfa2.fins) |
|
59 } |
|
60 |
|
61 // alternative of two NFAs |
|
62 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = { |
22 val Q = TState() |
63 val Q = TState() |
23 val new_delta : eNFAtrans = |
64 val new_delta : eNFAtrans = |
24 { case (Q, None) => enfa1.starts | enfa2.starts } |
65 { case (Q, None) => enfa1.starts | enfa2.starts } |
25 |
|
26 val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q) |
66 val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q) |
27 |
67 |
28 eNFA(Set(Q), |
68 eNFA(Set(Q), new_delta +++ enfa1.delta +++ enfa2.delta, new_fins) |
29 new_delta andAlso enfa1.delta andAlso enfa2.delta, |
|
30 new_fins) |
|
31 } |
69 } |
32 |
70 |
33 // sequence of two eNFAs |
71 // star of a NFA |
34 def eNFA_SEQ(enfa1: NFA[TState, Char], enfa2: NFA[TState, Char]) : NFA[TState, Char] = { |
72 def NFA_STAR(enfa: NFAt) : NFAt = { |
35 val new_delta : eNFAtrans = |
|
36 { case (q, None) if enfa1.fins(q) => enfa2.starts } |
|
37 |
|
38 eNFA(enfa1.starts, |
|
39 new_delta andAlso enfa1.delta andAlso enfa2.delta, |
|
40 enfa2.fins) |
|
41 } |
|
42 |
|
43 // star of an eNFA |
|
44 def eNFA_STAR(enfa: NFA[TState, Char]) : NFA[TState, Char] = { |
|
45 val Q = TState() |
73 val Q = TState() |
46 val new_delta : eNFAtrans = |
74 val new_delta : eNFAtrans = |
47 { case (Q, None) => enfa.starts |
75 { case (Q, None) => enfa.starts |
48 case (q, None) if enfa.fins(q) => Set(Q) } |
76 case (q, None) if enfa.fins(q) => Set(Q) } |
49 |
77 |
50 eNFA(Set(Q), new_delta andAlso enfa.delta, Set(Q)) |
78 eNFA(Set(Q), new_delta +++ enfa.delta, Set(Q)) |
51 } |
79 } |
|
80 |
|
81 |
|
82 |
|
83 // regular expressions |
|
84 abstract class Rexp |
|
85 case object ZERO extends Rexp // matches nothing |
|
86 case object ONE extends Rexp // matches the empty string |
|
87 case class CHAR(c: Char) extends Rexp // matches a character c |
|
88 case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative |
|
89 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence |
|
90 case class STAR(r: Rexp) extends Rexp // star |
|
91 |
|
92 |
|
93 |
|
94 |
|
95 // thompson construction |
|
96 def thompson (r: Rexp) : NFAt = r match { |
|
97 case ZERO => NFA_ZERO() |
|
98 case ONE => NFA_ONE() |
|
99 case CHAR(c) => NFA_CHAR(c) |
|
100 case ALT(r1, r2) => NFA_ALT(thompson(r1), thompson(r2)) |
|
101 case SEQ(r1, r2) => NFA_SEQ(thompson(r1), thompson(r2)) |
|
102 case STAR(r1) => NFA_STAR(thompson(r1)) |
|
103 } |
|
104 |
|
105 |
|
106 def tmatches(r: Rexp, s: String) : Boolean = |
|
107 thompson(r).accepts(s.toList) |
|
108 |
|
109 def tmatches2(r: Rexp, s: String) : Boolean = |
|
110 thompson(r).accepts2(s.toList) |
|
111 |
|
112 |
|
113 //optional regular expression (one or zero times) |
|
114 def OPT(r: Rexp) = ALT(r, ONE) |
|
115 |
|
116 //n-times regular expression (explicitly expanded) |
|
117 def NTIMES(r: Rexp, n: Int) : Rexp = n match { |
|
118 case 0 => ONE |
|
119 case 1 => r |
|
120 case n => SEQ(r, NTIMES(r, n - 1)) |
|
121 } |
|
122 |
|
123 |
|
124 // Test Cases |
|
125 |
|
126 // the evil regular expression a?{n} a{n} |
|
127 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) |
|
128 |
|
129 // the evil regular expression (a*)*b |
|
130 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
|
131 |
|
132 //for measuring time |
|
133 def time_needed[T](i: Int, code: => T) = { |
|
134 val start = System.nanoTime() |
|
135 for (j <- 1 to i) code |
|
136 val end = System.nanoTime() |
|
137 (end - start)/(i * 1.0e9) |
|
138 |
|
139 |
|
140 // the size of the NFA can be large, |
|
141 // thus slowing down the breadth-first search |
|
142 |
|
143 for (i <- 1 to 10) { |
|
144 println(i + ": " + "%.5f".format(time_needed(2, tmatches(EVIL1(i), "a" * i)))) |
|
145 } |
|
146 |
|
147 for (i <- 1 to 10) { |
|
148 println(i + " " + "%.5f".format(time_needed(2, tmatches(EVIL2, "a" * i)))) |
|
149 } |
|
150 |
|
151 |
|
152 // the backtracking needed in depth-first search |
|
153 // can be painfully slow |
|
154 |
|
155 for (i <- 1 to 8) { |
|
156 println(i + " " + "%.5f".format(time_needed(2, tmatches2(EVIL2, "a" * i)))) |
|
157 } |