1
|
/*
|
2
|
Copyright (c) 2003-2006 Niels Kokholm and Peter Sestoft
|
3
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
of this software and associated documentation files (the "Software"), to deal
|
5
|
in the Software without restriction, including without limitation the rights
|
6
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
copies of the Software, and to permit persons to whom the Software is
|
8
|
furnished to do so, subject to the following conditions:
|
9
|
|
10
|
The above copyright notice and this permission notice shall be included in
|
11
|
all copies or substantial portions of the Software.
|
12
|
|
13
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
SOFTWARE.
|
20
|
*/
|
21
|
|
22
|
// Compile with
|
23
|
// csc /r:C5.dll GNfaToDfa.cs
|
24
|
|
25
|
// C5 examples: RegExp -> NFA -> DFA -> Graph
|
26
|
// Java 2000-10-07, GC# 2001-10-23, C# 2.0 2003-09-03, C# 2.0+C5 2004-08-08
|
27
|
|
28
|
// This file contains, in order:
|
29
|
// * Helper class Set<T> defined in terms of C5 classes.
|
30
|
// * A class Nfa for representing an NFA (a nondeterministic finite
|
31
|
// automaton), and for converting it to a DFA (a deterministic
|
32
|
// finite automaton). Most complexity is in this class.
|
33
|
// * A class Dfa for representing a DFA, a deterministic finite
|
34
|
// automaton, and for writing a dot input file representing the DFA.
|
35
|
// * Classes for representing regular expressions, and for building an
|
36
|
// NFA from a regular expression
|
37
|
// * A test class that creates an NFA, a DFA, and a dot input file
|
38
|
// for a number of small regular expressions. The DFAs are
|
39
|
// not minimized.
|
40
|
|
41
|
using System;
|
42
|
using System.Text;
|
43
|
using System.IO;
|
44
|
using C5;
|
45
|
using SCG = System.Collections.Generic;
|
46
|
|
47
|
namespace GNfaToDfa
|
48
|
{
|
49
|
|
50
|
public class Set<T> : HashSet<T> {
|
51
|
public Set(SCG.IEnumerable<T> enm) : base() {
|
52
|
AddAll(enm);
|
53
|
}
|
54
|
|
55
|
public Set(params T[] elems) : this((SCG.IEnumerable<T>)elems) { }
|
56
|
|
57
|
// Set union (+), difference (-), and intersection (*):
|
58
|
|
59
|
public static Set<T> operator +(Set<T> s1, Set<T> s2) {
|
60
|
if (s1 == null || s2 == null)
|
61
|
throw new ArgumentNullException("Set+Set");
|
62
|
else {
|
63
|
Set<T> res = new Set<T>(s1);
|
64
|
res.AddAll(s2);
|
65
|
return res;
|
66
|
}
|
67
|
}
|
68
|
|
69
|
public static Set<T> operator -(Set<T> s1, Set<T> s2) {
|
70
|
if (s1 == null || s2 == null)
|
71
|
throw new ArgumentNullException("Set-Set");
|
72
|
else {
|
73
|
Set<T> res = new Set<T>(s1);
|
74
|
res.RemoveAll(s2);
|
75
|
return res;
|
76
|
}
|
77
|
}
|
78
|
|
79
|
public static Set<T> operator *(Set<T> s1, Set<T> s2) {
|
80
|
if (s1 == null || s2 == null)
|
81
|
throw new ArgumentNullException("Set*Set");
|
82
|
else {
|
83
|
Set<T> res = new Set<T>(s1);
|
84
|
res.RetainAll(s2);
|
85
|
return res;
|
86
|
}
|
87
|
}
|
88
|
|
89
|
// Equality of sets; take care to avoid infinite loops
|
90
|
|
91
|
public static bool operator ==(Set<T> s1, Set<T> s2) {
|
92
|
return EqualityComparer<Set<T>>.Default.Equals(s1, s2);
|
93
|
}
|
94
|
|
95
|
public static bool operator !=(Set<T> s1, Set<T> s2) {
|
96
|
return !(s1 == s2);
|
97
|
}
|
98
|
|
99
|
public override bool Equals(Object that) {
|
100
|
return this == (that as Set<T>);
|
101
|
}
|
102
|
|
103
|
public override int GetHashCode() {
|
104
|
return EqualityComparer<Set<T>>.Default.GetHashCode(this);
|
105
|
}
|
106
|
|
107
|
// Subset (<=) and superset (>=) relation:
|
108
|
|
109
|
public static bool operator <=(Set<T> s1, Set<T> s2) {
|
110
|
if (s1 == null || s2 == null)
|
111
|
throw new ArgumentNullException("Set<=Set");
|
112
|
else
|
113
|
return s1.ContainsAll(s2);
|
114
|
}
|
115
|
|
116
|
public static bool operator >=(Set<T> s1, Set<T> s2) {
|
117
|
if (s1 == null || s2 == null)
|
118
|
throw new ArgumentNullException("Set>=Set");
|
119
|
else
|
120
|
return s2.ContainsAll(s1);
|
121
|
}
|
122
|
|
123
|
public override String ToString() {
|
124
|
StringBuilder sb = new StringBuilder();
|
125
|
sb.Append("{");
|
126
|
bool first = true;
|
127
|
foreach (T x in this) {
|
128
|
if (!first)
|
129
|
sb.Append(",");
|
130
|
sb.Append(x);
|
131
|
first = false;
|
132
|
}
|
133
|
sb.Append("}");
|
134
|
return sb.ToString();
|
135
|
}
|
136
|
}
|
137
|
|
138
|
// ----------------------------------------------------------------------
|
139
|
|
140
|
// Regular expressions, NFAs, DFAs, and dot graphs
|
141
|
// sestoft@dina.kvl.dk *
|
142
|
// Java 2001-07-10 * C# 2001-10-22 * Gen C# 2001-10-23, 2003-09-03
|
143
|
|
144
|
// In the Generic C# 2.0 version we
|
145
|
// use Queue<int> and Queue<Set<int>> for worklists
|
146
|
// use Set<int> for pre-DFA states
|
147
|
// use ArrayList<Transition> for NFA transition relations
|
148
|
// use HashDictionary<Set<int>, HashDictionary<String, Set<int>>>
|
149
|
// and HashDictionary<int, HashDictionary<String, int>> for DFA transition relations
|
150
|
|
151
|
/* Class Nfa and conversion from NFA to DFA ---------------------------
|
152
|
|
153
|
A nondeterministic finite automaton (NFA) is represented as a
|
154
|
dictionary mapping a state number (int) to an arraylist of
|
155
|
Transitions, a Transition being a pair of a label lab (a string,
|
156
|
null meaning epsilon) and a target state (an int).
|
157
|
|
158
|
A DFA is created from an NFA in two steps:
|
159
|
|
160
|
(1) Construct a DFA whose each of whose states is composite,
|
161
|
namely a set of NFA states (Set of int). This is done by
|
162
|
methods CompositeDfaTrans and EpsilonClose.
|
163
|
|
164
|
(2) Replace composite states (Set of int) by simple states
|
165
|
(int). This is done by methods Rename and MkRenamer.
|
166
|
|
167
|
Method CompositeDfaTrans works as follows:
|
168
|
|
169
|
Create the epsilon-closure S0 (a Set of ints) of the start state
|
170
|
s0, and put it in a worklist (a Queue). Create an empty DFA
|
171
|
transition relation, which is a dictionary mapping a composite
|
172
|
state (an epsilon-closed set of ints) to a dictionary mapping a
|
173
|
label (a non-null string) to a composite state.
|
174
|
|
175
|
Repeatedly choose a composite state S from the worklist. If it is
|
176
|
not already in the keyset of the DFA transition relation, compute
|
177
|
for every non-epsilon label lab the set T of states reachable by
|
178
|
that label from some state s in S. Compute the epsilon-closure
|
179
|
Tclose of every such state T and put it on the worklist. Then add
|
180
|
the transition S -lab-> Tclose to the DFA transition relation, for
|
181
|
every lab.
|
182
|
|
183
|
Method EpsilonClose works as follows:
|
184
|
|
185
|
Given a set S of states. Put the states of S in a worklist.
|
186
|
Repeatedly choose a state s from the worklist, and consider all
|
187
|
epsilon-transitions s -eps-> s' from s. If s' is in S already,
|
188
|
then do nothing; otherwise add s' to S and the worklist. When the
|
189
|
worklist is empty, S is epsilon-closed; return S.
|
190
|
|
191
|
Method MkRenamer works as follows:
|
192
|
|
193
|
Given a dictionary mapping a set of int to something, create an
|
194
|
injective dictionary mapping from set of int to int, by choosing a
|
195
|
fresh int for every key in the given dictionary.
|
196
|
|
197
|
Method Rename works as follows:
|
198
|
|
199
|
Given a dictionary mapping a set of int to a dictionary mapping a
|
200
|
string to set of int, use the result of MkRenamer to replace all
|
201
|
sets of ints by ints.
|
202
|
|
203
|
*/
|
204
|
|
205
|
class Nfa {
|
206
|
private readonly int startState;
|
207
|
private readonly int exitState; // This is the unique accept state
|
208
|
private readonly IDictionary<int, ArrayList<Transition>> trans;
|
209
|
|
210
|
public Nfa(int startState, int exitState) {
|
211
|
this.startState = startState; this.exitState = exitState;
|
212
|
trans = new HashDictionary<int, ArrayList<Transition>>();
|
213
|
if (!startState.Equals(exitState))
|
214
|
trans.Add(exitState, new ArrayList<Transition>());
|
215
|
}
|
216
|
|
217
|
public int Start { get { return startState; } }
|
218
|
|
219
|
public int Exit { get { return exitState; } }
|
220
|
|
221
|
public IDictionary<int, ArrayList<Transition>> Trans {
|
222
|
get { return trans; }
|
223
|
}
|
224
|
|
225
|
public void AddTrans(int s1, String lab, int s2) {
|
226
|
ArrayList<Transition> s1Trans;
|
227
|
if (trans.Contains(s1))
|
228
|
s1Trans = trans[s1];
|
229
|
else {
|
230
|
s1Trans = new ArrayList<Transition>();
|
231
|
trans.Add(s1, s1Trans);
|
232
|
}
|
233
|
s1Trans.Add(new Transition(lab, s2));
|
234
|
}
|
235
|
|
236
|
public void AddTrans(KeyValuePair<int, ArrayList<Transition>> tr) {
|
237
|
// Assumption: if tr is in trans, it maps to an empty list (end state)
|
238
|
trans.Remove(tr.Key);
|
239
|
trans.Add(tr.Key, tr.Value);
|
240
|
}
|
241
|
|
242
|
public override String ToString() {
|
243
|
return "NFA start=" + startState + " exit=" + exitState;
|
244
|
}
|
245
|
|
246
|
// Construct the transition relation of a composite-state DFA from
|
247
|
// an NFA with start state s0 and transition relation trans (a
|
248
|
// dictionary mapping int to arraylist of Transition). The start
|
249
|
// state of the constructed DFA is the epsilon closure of s0, and
|
250
|
// its transition relation is a dictionary mapping a composite state
|
251
|
// (a set of ints) to a dictionary mapping a label (a string) to a
|
252
|
// composite state (a set of ints).
|
253
|
|
254
|
static IDictionary<Set<int>, IDictionary<String, Set<int>>>
|
255
|
CompositeDfaTrans(int s0, IDictionary<int, ArrayList<Transition>> trans) {
|
256
|
Set<int> S0 = EpsilonClose(new Set<int>(s0), trans);
|
257
|
IQueue<Set<int>> worklist = new CircularQueue<Set<int>>();
|
258
|
worklist.Enqueue(S0);
|
259
|
// The transition relation of the DFA
|
260
|
IDictionary<Set<int>, IDictionary<String, Set<int>>> res =
|
261
|
new HashDictionary<Set<int>, IDictionary<String, Set<int>>>();
|
262
|
while (!worklist.IsEmpty) {
|
263
|
Set<int> S = worklist.Dequeue();
|
264
|
if (!res.Contains(S)) {
|
265
|
// The S -lab-> T transition relation being constructed for a given S
|
266
|
IDictionary<String, Set<int>> STrans =
|
267
|
new HashDictionary<String, Set<int>>();
|
268
|
// For all s in S, consider all transitions s -lab-> t
|
269
|
foreach (int s in S) {
|
270
|
// For all non-epsilon transitions s -lab-> t, add t to T
|
271
|
foreach (Transition tr in trans[s]) {
|
272
|
if (tr.lab != null) { // Non-epsilon transition
|
273
|
Set<int> toState;
|
274
|
if (STrans.Contains(tr.lab)) // Already a transition on lab
|
275
|
toState = STrans[tr.lab];
|
276
|
else { // No transitions on lab yet
|
277
|
toState = new Set<int>();
|
278
|
STrans.Add(tr.lab, toState);
|
279
|
}
|
280
|
toState.Add(tr.target);
|
281
|
}
|
282
|
}
|
283
|
}
|
284
|
// Epsilon-close all T such that S -lab-> T, and put on worklist
|
285
|
IDictionary<String, Set<int>> STransClosed =
|
286
|
new HashDictionary<String, Set<int>>();
|
287
|
foreach (KeyValuePair<String, Set<int>> entry in STrans) {
|
288
|
Set<int> Tclose = EpsilonClose(entry.Value, trans);
|
289
|
STransClosed.Add(entry.Key, Tclose);
|
290
|
worklist.Enqueue(Tclose);
|
291
|
}
|
292
|
res.Add(S, STransClosed);
|
293
|
}
|
294
|
}
|
295
|
return res;
|
296
|
}
|
297
|
|
298
|
// Compute epsilon-closure of state set S in transition relation trans.
|
299
|
|
300
|
static Set<int>
|
301
|
EpsilonClose(Set<int> S, IDictionary<int, ArrayList<Transition>> trans) {
|
302
|
// The worklist initially contains all S members
|
303
|
IQueue<int> worklist = new CircularQueue<int>();
|
304
|
S.Apply(worklist.Enqueue);
|
305
|
Set<int> res = new Set<int>(S);
|
306
|
while (!worklist.IsEmpty) {
|
307
|
int s = worklist.Dequeue();
|
308
|
foreach (Transition tr in trans[s]) {
|
309
|
if (tr.lab == null && !res.Contains(tr.target)) {
|
310
|
res.Add(tr.target);
|
311
|
worklist.Enqueue(tr.target);
|
312
|
}
|
313
|
}
|
314
|
}
|
315
|
return res;
|
316
|
}
|
317
|
|
318
|
// Compute a renamer, which is a dictionary mapping set of int to int
|
319
|
|
320
|
static IDictionary<Set<int>, int> MkRenamer(ICollectionValue<Set<int>> states)
|
321
|
{
|
322
|
IDictionary<Set<int>, int> renamer = new HashDictionary<Set<int>, int>();
|
323
|
int count = 0;
|
324
|
foreach (Set<int> k in states)
|
325
|
renamer.Add(k, count++);
|
326
|
return renamer;
|
327
|
}
|
328
|
|
329
|
// Using a renamer (a dictionary mapping set of int to int), replace
|
330
|
// composite (set of int) states with simple (int) states in the
|
331
|
// transition relation trans, which is a dictionary mapping set of
|
332
|
// int to a dictionary mapping from string to set of int. The
|
333
|
// result is a dictionary mapping from int to a dictionary mapping
|
334
|
// from string to int.
|
335
|
|
336
|
static IDictionary<int, IDictionary<String, int>>
|
337
|
Rename(IDictionary<Set<int>, int> renamer,
|
338
|
IDictionary<Set<int>, IDictionary<String, Set<int>>> trans)
|
339
|
{
|
340
|
IDictionary<int, IDictionary<String, int>> newtrans =
|
341
|
new HashDictionary<int, IDictionary<String, int>>();
|
342
|
foreach (KeyValuePair<Set<int>, IDictionary<String, Set<int>>> entry
|
343
|
in trans) {
|
344
|
Set<int> k = entry.Key;
|
345
|
IDictionary<String, int> newktrans = new HashDictionary<String, int>();
|
346
|
foreach (KeyValuePair<String, Set<int>> tr in entry.Value)
|
347
|
newktrans.Add(tr.Key, renamer[tr.Value]);
|
348
|
newtrans.Add(renamer[k], newktrans);
|
349
|
}
|
350
|
return newtrans;
|
351
|
}
|
352
|
|
353
|
static Set<int> AcceptStates(ICollectionValue<Set<int>> states,
|
354
|
IDictionary<Set<int>, int> renamer,
|
355
|
int exit)
|
356
|
{
|
357
|
Set<int> acceptStates = new Set<int>();
|
358
|
foreach (Set<int> state in states)
|
359
|
if (state.Contains(exit))
|
360
|
acceptStates.Add(renamer[state]);
|
361
|
return acceptStates;
|
362
|
}
|
363
|
|
364
|
public Dfa ToDfa() {
|
365
|
IDictionary<Set<int>, IDictionary<String, Set<int>>>
|
366
|
cDfaTrans = CompositeDfaTrans(startState, trans);
|
367
|
Set<int> cDfaStart = EpsilonClose(new Set<int>(startState), trans);
|
368
|
ICollectionValue<Set<int>> cDfaStates = cDfaTrans.Keys;
|
369
|
IDictionary<Set<int>, int> renamer = MkRenamer(cDfaStates);
|
370
|
IDictionary<int, IDictionary<String, int>> simpleDfaTrans =
|
371
|
Rename(renamer, cDfaTrans);
|
372
|
int simpleDfaStart = renamer[cDfaStart];
|
373
|
Set<int> simpleDfaAccept = AcceptStates(cDfaStates, renamer, exitState);
|
374
|
return new Dfa(simpleDfaStart, simpleDfaAccept, simpleDfaTrans);
|
375
|
}
|
376
|
|
377
|
// Nested class for creating distinctly named states when constructing NFAs
|
378
|
|
379
|
public class NameSource {
|
380
|
private static int nextName = 0;
|
381
|
|
382
|
public int next() {
|
383
|
return nextName++;
|
384
|
}
|
385
|
}
|
386
|
|
387
|
// Write an input file for the dot program. You can find dot at
|
388
|
// http://www.research.att.com/sw/tools/graphviz/
|
389
|
|
390
|
public void WriteDot(String filename) {
|
391
|
TextWriter wr =
|
392
|
new StreamWriter(new FileStream(filename, FileMode.Create,
|
393
|
FileAccess.Write));
|
394
|
wr.WriteLine("// Format this file as a Postscript file with ");
|
395
|
wr.WriteLine("// dot " + filename + " -Tps -o out.ps\n");
|
396
|
wr.WriteLine("digraph nfa {");
|
397
|
wr.WriteLine("size=\"11,8.25\";");
|
398
|
wr.WriteLine("rotate=90;");
|
399
|
wr.WriteLine("rankdir=LR;");
|
400
|
wr.WriteLine("start [style=invis];"); // Invisible start node
|
401
|
wr.WriteLine("start -> d" + startState); // Edge into start state
|
402
|
|
403
|
// The accept state has a double circle
|
404
|
wr.WriteLine("d" + exitState + " [peripheries=2];");
|
405
|
|
406
|
// The transitions
|
407
|
foreach (KeyValuePair<int, ArrayList<Transition>> entry in trans) {
|
408
|
int s1 = entry.Key;
|
409
|
foreach (Transition s1Trans in entry.Value) {
|
410
|
String lab = s1Trans.lab ?? "eps";
|
411
|
int s2 = s1Trans.target;
|
412
|
wr.WriteLine("d" + s1 + " -> d" + s2 + " [label=\"" + lab + "\"];");
|
413
|
}
|
414
|
}
|
415
|
wr.WriteLine("}");
|
416
|
wr.Close();
|
417
|
}
|
418
|
}
|
419
|
|
420
|
// Class Transition, a transition from one state to another ----------
|
421
|
|
422
|
public class Transition {
|
423
|
public readonly String lab;
|
424
|
public readonly int target;
|
425
|
|
426
|
public Transition(String lab, int target) {
|
427
|
this.lab = lab; this.target = target;
|
428
|
}
|
429
|
|
430
|
public override String ToString() {
|
431
|
return "-" + lab + "-> " + target;
|
432
|
}
|
433
|
}
|
434
|
|
435
|
// Class Dfa, deterministic finite automata --------------------------
|
436
|
|
437
|
/*
|
438
|
A deterministic finite automaton (DFA) is represented as a
|
439
|
dictionary mapping state number (int) to a dictionary mapping label
|
440
|
(a non-null string) to a target state (an int).
|
441
|
*/
|
442
|
|
443
|
class Dfa {
|
444
|
private readonly int startState;
|
445
|
private readonly Set<int> acceptStates;
|
446
|
private readonly IDictionary<int, IDictionary<String, int>> trans;
|
447
|
|
448
|
public Dfa(int startState, Set<int> acceptStates,
|
449
|
IDictionary<int, IDictionary<String, int>> trans)
|
450
|
{
|
451
|
this.startState = startState;
|
452
|
this.acceptStates = acceptStates;
|
453
|
this.trans = trans;
|
454
|
}
|
455
|
|
456
|
public int Start { get { return startState; } }
|
457
|
|
458
|
public Set<int> Accept { get { return acceptStates; } }
|
459
|
|
460
|
public IDictionary<int, IDictionary<String, int>> Trans {
|
461
|
get { return trans; }
|
462
|
}
|
463
|
|
464
|
public override String ToString() {
|
465
|
return "DFA start=" + startState + "\naccept=" + acceptStates;
|
466
|
}
|
467
|
|
468
|
// Write an input file for the dot program. You can find dot at
|
469
|
// http://www.research.att.com/sw/tools/graphviz/
|
470
|
|
471
|
public void WriteDot(String filename) {
|
472
|
TextWriter wr =
|
473
|
new StreamWriter(new FileStream(filename, FileMode.Create,
|
474
|
FileAccess.Write));
|
475
|
wr.WriteLine("// Format this file as a Postscript file with ");
|
476
|
wr.WriteLine("// dot " + filename + " -Tps -o out.ps\n");
|
477
|
wr.WriteLine("digraph dfa {");
|
478
|
wr.WriteLine("size=\"11,8.25\";");
|
479
|
wr.WriteLine("rotate=90;");
|
480
|
wr.WriteLine("rankdir=LR;");
|
481
|
wr.WriteLine("start [style=invis];"); // Invisible start node
|
482
|
wr.WriteLine("start -> d" + startState); // Edge into start state
|
483
|
|
484
|
// Accept states are double circles
|
485
|
foreach (int state in trans.Keys)
|
486
|
if (acceptStates.Contains(state))
|
487
|
wr.WriteLine("d" + state + " [peripheries=2];");
|
488
|
|
489
|
// The transitions
|
490
|
foreach (KeyValuePair<int, IDictionary<String, int>> entry in trans) {
|
491
|
int s1 = entry.Key;
|
492
|
foreach (KeyValuePair<String, int> s1Trans in entry.Value) {
|
493
|
String lab = s1Trans.Key;
|
494
|
int s2 = s1Trans.Value;
|
495
|
wr.WriteLine("d" + s1 + " -> d" + s2 + " [label=\"" + lab + "\"];");
|
496
|
}
|
497
|
}
|
498
|
wr.WriteLine("}");
|
499
|
wr.Close();
|
500
|
}
|
501
|
}
|
502
|
|
503
|
// Regular expressions ----------------------------------------------
|
504
|
//
|
505
|
// Abstract syntax of regular expressions
|
506
|
// r ::= A | r1 r2 | (r1|r2) | r*
|
507
|
//
|
508
|
|
509
|
abstract class Regex {
|
510
|
abstract public Nfa MkNfa(Nfa.NameSource names);
|
511
|
}
|
512
|
|
513
|
class Eps : Regex {
|
514
|
// The resulting nfa0 has form s0s -eps-> s0e
|
515
|
|
516
|
public override Nfa MkNfa(Nfa.NameSource names) {
|
517
|
int s0s = names.next();
|
518
|
int s0e = names.next();
|
519
|
Nfa nfa0 = new Nfa(s0s, s0e);
|
520
|
nfa0.AddTrans(s0s, null, s0e);
|
521
|
return nfa0;
|
522
|
}
|
523
|
}
|
524
|
|
525
|
class Sym : Regex {
|
526
|
String sym;
|
527
|
|
528
|
public Sym(String sym) {
|
529
|
this.sym = sym;
|
530
|
}
|
531
|
|
532
|
// The resulting nfa0 has form s0s -sym-> s0e
|
533
|
|
534
|
public override Nfa MkNfa(Nfa.NameSource names) {
|
535
|
int s0s = names.next();
|
536
|
int s0e = names.next();
|
537
|
Nfa nfa0 = new Nfa(s0s, s0e);
|
538
|
nfa0.AddTrans(s0s, sym, s0e);
|
539
|
return nfa0;
|
540
|
}
|
541
|
}
|
542
|
|
543
|
class Seq : Regex {
|
544
|
Regex r1, r2;
|
545
|
|
546
|
public Seq(Regex r1, Regex r2) {
|
547
|
this.r1 = r1; this.r2 = r2;
|
548
|
}
|
549
|
|
550
|
// If nfa1 has form s1s ----> s1e
|
551
|
// and nfa2 has form s2s ----> s2e
|
552
|
// then nfa0 has form s1s ----> s1e -eps-> s2s ----> s2e
|
553
|
|
554
|
public override Nfa MkNfa(Nfa.NameSource names) {
|
555
|
Nfa nfa1 = r1.MkNfa(names);
|
556
|
Nfa nfa2 = r2.MkNfa(names);
|
557
|
Nfa nfa0 = new Nfa(nfa1.Start, nfa2.Exit);
|
558
|
foreach (KeyValuePair<int, ArrayList<Transition>> entry in nfa1.Trans)
|
559
|
nfa0.AddTrans(entry);
|
560
|
foreach (KeyValuePair<int, ArrayList<Transition>> entry in nfa2.Trans)
|
561
|
nfa0.AddTrans(entry);
|
562
|
nfa0.AddTrans(nfa1.Exit, null, nfa2.Start);
|
563
|
return nfa0;
|
564
|
}
|
565
|
}
|
566
|
|
567
|
class Alt : Regex {
|
568
|
Regex r1, r2;
|
569
|
|
570
|
public Alt(Regex r1, Regex r2) {
|
571
|
this.r1 = r1; this.r2 = r2;
|
572
|
}
|
573
|
|
574
|
// If nfa1 has form s1s ----> s1e
|
575
|
// and nfa2 has form s2s ----> s2e
|
576
|
// then nfa0 has form s0s -eps-> s1s ----> s1e -eps-> s0e
|
577
|
// s0s -eps-> s2s ----> s2e -eps-> s0e
|
578
|
|
579
|
public override Nfa MkNfa(Nfa.NameSource names) {
|
580
|
Nfa nfa1 = r1.MkNfa(names);
|
581
|
Nfa nfa2 = r2.MkNfa(names);
|
582
|
int s0s = names.next();
|
583
|
int s0e = names.next();
|
584
|
Nfa nfa0 = new Nfa(s0s, s0e);
|
585
|
foreach (KeyValuePair<int, ArrayList<Transition>> entry in nfa1.Trans)
|
586
|
nfa0.AddTrans(entry);
|
587
|
foreach (KeyValuePair<int, ArrayList<Transition>> entry in nfa2.Trans)
|
588
|
nfa0.AddTrans(entry);
|
589
|
nfa0.AddTrans(s0s, null, nfa1.Start);
|
590
|
nfa0.AddTrans(s0s, null, nfa2.Start);
|
591
|
nfa0.AddTrans(nfa1.Exit, null, s0e);
|
592
|
nfa0.AddTrans(nfa2.Exit, null, s0e);
|
593
|
return nfa0;
|
594
|
}
|
595
|
}
|
596
|
|
597
|
class Star : Regex {
|
598
|
Regex r;
|
599
|
|
600
|
public Star(Regex r) {
|
601
|
this.r = r;
|
602
|
}
|
603
|
|
604
|
// If nfa1 has form s1s ----> s1e
|
605
|
// then nfa0 has form s0s ----> s0s
|
606
|
// s0s -eps-> s1s
|
607
|
// s1e -eps-> s0s
|
608
|
|
609
|
public override Nfa MkNfa(Nfa.NameSource names) {
|
610
|
Nfa nfa1 = r.MkNfa(names);
|
611
|
int s0s = names.next();
|
612
|
Nfa nfa0 = new Nfa(s0s, s0s);
|
613
|
foreach (KeyValuePair<int, ArrayList<Transition>> entry in nfa1.Trans)
|
614
|
nfa0.AddTrans(entry);
|
615
|
nfa0.AddTrans(s0s, null, nfa1.Start);
|
616
|
nfa0.AddTrans(nfa1.Exit, null, s0s);
|
617
|
return nfa0;
|
618
|
}
|
619
|
}
|
620
|
|
621
|
// Trying the RE->NFA->DFA translation on three regular expressions
|
622
|
|
623
|
class TestNFA {
|
624
|
public static void Main(String[] args) {
|
625
|
Regex a = new Sym("A");
|
626
|
Regex b = new Sym("B");
|
627
|
Regex c = new Sym("C");
|
628
|
Regex abStar = new Star(new Alt(a, b));
|
629
|
Regex bb = new Seq(b, b);
|
630
|
Regex r = new Seq(abStar, new Seq(a, b));
|
631
|
// The regular expression (a|b)*ab
|
632
|
BuildAndShow("ex1", r);
|
633
|
// The regular expression ((a|b)*ab)*
|
634
|
BuildAndShow("ex2", new Star(r));
|
635
|
// The regular expression ((a|b)*ab)((a|b)*ab)
|
636
|
BuildAndShow("ex3", new Seq(r, r));
|
637
|
// The regular expression (a|b)*abb, from ASU 1986 p 136
|
638
|
BuildAndShow("ex4", new Seq(abStar, new Seq(a, bb)));
|
639
|
// SML reals: sign?((digit+(\.digit+)?))([eE]sign?digit+)?
|
640
|
Regex d = new Sym("digit");
|
641
|
Regex dPlus = new Seq(d, new Star(d));
|
642
|
Regex s = new Sym("sign");
|
643
|
Regex sOpt = new Alt(s, new Eps());
|
644
|
Regex dot = new Sym(".");
|
645
|
Regex dotDigOpt = new Alt(new Eps(), new Seq(dot, dPlus));
|
646
|
Regex mant = new Seq(sOpt, new Seq(dPlus, dotDigOpt));
|
647
|
Regex e = new Sym("e");
|
648
|
Regex exp = new Alt(new Eps(), new Seq(e, new Seq(sOpt, dPlus)));
|
649
|
Regex smlReal = new Seq(mant, exp);
|
650
|
BuildAndShow("ex5", smlReal);
|
651
|
}
|
652
|
|
653
|
public static void BuildAndShow(String fileprefix, Regex r) {
|
654
|
Nfa nfa = r.MkNfa(new Nfa.NameSource());
|
655
|
Console.WriteLine(nfa);
|
656
|
Console.WriteLine("Writing NFA graph to file");
|
657
|
nfa.WriteDot(fileprefix + "nfa.dot");
|
658
|
Console.WriteLine("---");
|
659
|
Dfa dfa = nfa.ToDfa();
|
660
|
Console.WriteLine(dfa);
|
661
|
Console.WriteLine("Writing DFA graph to file");
|
662
|
dfa.WriteDot(fileprefix + "dfa.dot");
|
663
|
Console.WriteLine();
|
664
|
}
|
665
|
}
|
666
|
}
|