-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnfa.py
More file actions
287 lines (230 loc) · 9.92 KB
/
nfa.py
File metadata and controls
287 lines (230 loc) · 9.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
import pdb
class NFAState(object):
def __init__(self, letter, finish=False):
self.letter = letter
self.finish = finish
self.nexts = dict()
def addNextState(self, state, letter=None):
if not letter:
letter = state.letter
nextStates = self.nexts.get(letter)
if nextStates:
# state already contains a transition for this letter
nextStates.append(state)
else:
self.nexts[letter] = [state]
def getNextStates(self):
result = []
for k in self.nexts.keys():
result.extend(self.nexts[k])
return set(result)
def __str__(self):
result = "letter: " + str(self.letter) + "\nfinish: " + str(self.finish) + "\nnexts: \n" + str(self.nexts)
for k in self.nexts.keys():
result += "\n" + str(k) + ":\n"
return result
class NFAFrag():
def __init__(self, enterState):
self.enterStates = [enterState]
self.exitStates = self.enterStates
# creates a transition from self's exitStates to all states in stateList
# updates self's exitStates to stateList
def appendToExitStates(self, stateList, letter=None):
self._appendStates(self.exitStates, stateList, letter)
# creates transition from self's enter states to stateList
def appendToEnterStates(self, stateList, letter=None):
self._appendStates(self.enterStates, stateList, letter)
# creates a transition from fromStates to toStates, if letter is specified, then it's used as
# the transition character for all states
def _appendStates(self, fromStates, toStates, letter=None):
for fs in fromStates:
for ts in toStates:
fs.addNextState(ts, letter)
# appends other to the exitStates of self, updating self's exitStates to be other's exitStates
def appendFragment(self, other):
self.appendToExitStates(other.enterStates)
self.exitStates = other.exitStates
return self
# if self doesn't start with NC transition, then prepend an NC transition enterState to self
# merge other's exitStates with self's and append other to self's enterStates
def addSplitBranch(self, other):
if not self.isInitialNoChar():
ncStartState = [NFAState(NO_CHAR)]
self._appendStates(ncStartState, self.enterStates)
self.enterStates = ncStartState
# merge exit states
self.exitStates.extend(other.exitStates)
self.appendToEnterStates(other.enterStates)
def isInitialNoChar(self):
return self.startsWithSingleChar(NO_CHAR)
def isSplit(self):
return self.startsWithSingleChar(ALTERNATE) and self.exitStates == self.enterStates
def isGroupStart(self):
return self.startsWithSingleChar(GROUP_START)
def startsWithSingleChar(self, c):
return len(self.enterStates) == 1 and self.enterStates[0].letter == c
def __str__(self):
result = ""
result += "Enter States: \n" + str(list(map(str, self.enterStates)))
result += "\nExit States: \n" + str(list(map(str, self.exitStates)))
REGEX_OPS = {'*' ,
'+' ,
'?' ,
'.' ,
'|' ,
'(' ,
')' }
#
# represents a state that doesn't require an input to advance to it
NO_CHAR = chr(257)
# transition on any char '.'
ANY_CHAR = chr(258)
#
# alternate between two patterns '|'
ALTERNATE = chr(259)
# '('
GROUP_START = chr(260)
#
START_STATE = chr(261)
class NFA(object):
""" represents a container object for a Nondeterministic Finite Automaton """
def __init__(self, pattern):
self.pattern = pattern
self.currentStates = set()
self.startState = self._parsePattern(pattern)
self.reset()
# parses the regex pattern into an NFA and returns the first state of the NFA
def _parsePattern(self, pattern):
fragStack = []
for c in pattern:
if c not in REGEX_OPS:
newFrag = NFAFrag(NFAState(c))
fragStack.append(newFrag)
else :
if c == '+' or c == '*':
# add NO_CHAR transition from topFrag's exitStates to enterStates
newFrag = NFAFrag(NFAState(NO_CHAR))
topFrag = fragStack.pop()
newFrag.appendFragment(topFrag)
newFrag.appendToExitStates(newFrag.enterStates, NO_CHAR)
# repeatedState.addNextState(repeatedState)
if c == '*':
# add a NO_CHAR transition to skip over entire fragment
newFrag.appendToEnterStates(newFrag.exitStates, NO_CHAR)
fragStack.append(newFrag)
elif c == '?':
zeroOneFrag = fragStack.pop()
# add a NO_CHAR state to begining of zero-oned frag for auto-skip
zeroOneFragInit = NFAFrag(NFAState(NO_CHAR))
zeroOneFragInit.appendFragment(zeroOneFrag)
zeroOneFrag = zeroOneFragInit
# link start states to end states via a NO_CHAR
zeroOneFrag.appendToEnterStates(zeroOneFrag.exitStates, NO_CHAR)
fragStack.append(zeroOneFrag)
elif c == '.':
# anycharState = NFAState(ANY_CHAR)
# stateStack.append(anycharState)
newFrag = NFAFrag(NFAState(ANY_CHAR))
fragStack.append(newFrag)
elif c == '|':
# on alternation, chain all preceding fragments together,
# push back on stack and push splitfrag on top
splitFrag = NFAFrag(NFAState(ALTERNATE))
altBranch = self._chainStackGroup(fragStack)
fragStack.append(altBranch)
# put state chain back onto the stack & repeat the above steps
fragStack.append(splitFrag)
elif c == '(':
# mark start of group on stack
groupStartFrag = NFAFrag(NFAState(GROUP_START))
fragStack.append(groupStartFrag)
elif c == ')':
# chain stack until the group start fragment is seen
group = self._chainStackGroup(fragStack)
fragStack.append(group)
nfaFrag = self._chainStackGroup(fragStack)
# pattern was a well-formed regular expression w/ matching parens
if not fragStack:
# set all end states to be finish states
for s in nfaFrag.exitStates:
s.finish = True
# add a non-character first state
firstState = NFAState(START_STATE)
firstFrag = NFAFrag(firstState)
firstFrag.appendFragment(nfaFrag)
return firstState
else:
# notify library user that an error occurred in parsing pattern
return None
# chains all fragments in fragStack into a single fragment until stack is empty or
# or a groupStart fragment has been seen
def _chainStackGroup(self, fragStack):
prevFrag = fragStack.pop()
while fragStack:
curFrag = fragStack.pop()
if curFrag.isGroupStart():
return prevFrag
if curFrag.isSplit():
# split branch is below the split frag operator
curFrag = fragStack.pop()
curFrag.addSplitBranch(prevFrag)
else:
curFrag.appendFragment(prevFrag)
prevFrag = curFrag
return prevFrag
# puts the NFA back to its start state
def reset(self):
self.currentStates = set([self.startState])
# advances to the next state(s) in the NFA
# returns true if advancement is successful otherwise false
def advanceStates(self, letter):
newStates = set()
self.currentStates = self._autoAdvanceNoChars(self.currentStates)
while self.currentStates:
curState = self.currentStates.pop()
if ANY_CHAR in curState.nexts:
# any char state transition
newStates.update(set(curState.nexts[ANY_CHAR]))
elif letter in curState.nexts:
newStates.update(set(curState.nexts[letter]))
newStates = self._autoAdvanceNoChars(newStates)
if newStates:
self.currentStates = newStates
return newStates != set()
# auto-advance all no-char transitions in stateSet
def _autoAdvanceNoChars(self, stateSet):
prevLen = 0
curLen = len(stateSet)
while prevLen != curLen:
addedStates = set()
prevLen = curLen
for s in stateSet:
if NO_CHAR in s.nexts:
addedStates.update(set(s.nexts[NO_CHAR]))
stateSet.update(addedStates)
curLen = len(stateSet)
return stateSet
# returns true if any of the current NFA states are finished states
def finished(self):
for n in self.currentStates:
if n.finish:
return True
return False
def __str__(self):
result = "NFA: \n" + str(self.startState)
seenStates = set()
seenStates.add(self.startState)
toExamine = []
toExamine.extend(self.startState.getNextStates())
prevLen = 0
while len(seenStates) != prevLen:
prevLen = len(seenStates)
newToExamine = []
for s in toExamine:
if s not in seenStates:
result += "\n" + str(s)
seenStates.add(s)
newToExamine.extend(s.getNextStates().difference(seenStates))
toExamine = newToExamine
result += "\n\n Current States: \n" + str(list(map(str, self.currentStates)))
return result