Skip to content

Commit 7dd5fd8

Browse files
Piotr Piastuckikaiw
authored andcommitted
Myers matcher performance improvements and cleanup
This patch slightly simplifies the code of Myers matcher and improves the performance up to 10%.
1 parent aff98a3 commit 7dd5fd8

1 file changed

Lines changed: 27 additions & 35 deletions

File tree

meld/matchers.py

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,8 @@ def preprocess(self):
105105

106106
# discard lines that do not match any line from the other file
107107
if n > 0 and m > 0:
108-
aset = set()
109-
bset = set()
110-
for newline in b:
111-
bset.add(newline)
112-
for newline in a:
113-
aset.add(newline)
108+
aset = frozenset(a)
109+
bset = frozenset(b)
114110
a2 = []
115111
b2 = []
116112
j = 0
@@ -204,7 +200,8 @@ def build_matching_blocks(self, lastsnake, snakes):
204200

205201
def initialise(self):
206202
"""
207-
Optimized implementaion of the O(NP) algorithm described by Sun Wu, Udi Manber, Gene Myers, Webb Miller
203+
Optimized implementation of the O(NP) algorithm described by Sun Wu,
204+
Udi Manber, Gene Myers, Webb Miller
208205
("An O(NP) Sequence Comparison Algorithm", 1989)
209206
http://research.janelia.org/myers/Papers/np_diff.pdf
210207
"""
@@ -214,10 +211,9 @@ def initialise(self):
214211
n = len(b)
215212
middle = m + 1
216213
lastsnake = None
217-
delta = n - m
218-
dmin = min(0, delta)
219-
dmax = max(0, delta)
220-
214+
delta = n - m + middle
215+
dmin = min(middle, delta)
216+
dmax = max(middle, delta)
221217
snakes = []
222218
if n > 0 and m > 0:
223219
size = n + m + 2
@@ -230,57 +226,53 @@ def initialise(self):
230226
# move along vertical edge
231227
yv = -1
232228
node = None
233-
for k in range(dmin - p, delta, 1):
234-
km = k + middle
235-
if yv < fp[km + 1][0]:
236-
yv, node = fp[km + 1]
229+
for km in range(dmin - p, delta, 1):
230+
t = fp[km + 1]
231+
if yv < t[0]:
232+
yv, node = t
237233
else:
238234
yv += 1
239-
x = yv - k
240-
snake = 0
235+
snake = x = yv - km + middle
241236
while x < m and yv < n and a[x] == b[yv]:
242237
x += 1
243238
yv += 1
244-
snake += 1
245-
if snake:
239+
if x != snake:
240+
snake = x - snake
246241
snakes.append((node, x - snake, yv - snake, snake))
247242
node = len(snakes) - 1
248243
fp[km] = (yv, node)
249244
# move along horizontal edge
250245
yh = -1
251246
node = None
252-
for k in range(dmax + p, delta, -1):
253-
km = k + middle
254-
if fp[km - 1][0] >= yh:
255-
yh, node = fp[km - 1]
247+
for km in range(dmax + p, delta, -1):
248+
t = fp[km - 1]
249+
if yh <= t[0]:
250+
yh, node = t
256251
yh += 1
257-
x = yh - k
258-
snake = 0
252+
snake = x = yh - km + middle
259253
while x < m and yh < n and a[x] == b[yh]:
260254
x += 1
261255
yh += 1
262-
snake += 1
263-
if snake:
256+
if x != snake:
257+
snake = x - snake
264258
snakes.append((node, x - snake, yh - snake, snake))
265259
node = len(snakes) - 1
266260
fp[km] = (yh, node)
267261
# point on the diagonal that leads to the sink
268-
km = delta + middle
269262
if yv < yh:
270-
y, node = fp[km + 1]
263+
y, node = fp[delta + 1]
271264
else:
272-
y, node = fp[km - 1]
265+
y, node = fp[delta - 1]
273266
y += 1
274-
x = y - delta
275-
snake = 0
267+
snake = x = y - delta + middle
276268
while x < m and y < n and a[x] == b[y]:
277269
x += 1
278270
y += 1
279-
snake += 1
280-
if snake:
271+
if x != snake:
272+
snake = x - snake
281273
snakes.append((node, x - snake, y - snake, snake))
282274
node = len(snakes) - 1
283-
fp[km] = (y, node)
275+
fp[delta] = (y, node)
284276
if y >= n:
285277
lastsnake = node
286278
break

0 commit comments

Comments
 (0)