Skip to content

Commit 1be7633

Browse files
committed
meldbuffer: Move BufferLines to just use GTK+ line logic (#240)
Over the years we've had many, many issues with GTK+ and Python disagreeing about what constitutes a line break. The actual bug that has prompted this rewrite is just another one in the long series of wild issues, but it's *so annoying* that rather than work through the insane logic I wrote before, I've just changed the whole approach and made our treat-my-textbuffer-as-a-list-of-lines shim use GTK+ APIs to do its line calculation logic. I'm was worried that this will be slow - it is after all a _lot_ more function calls and we're crossing through GObject introspection a bunch, but it... seems okay? One of the minor weird side-effects here is that this accessor absolutely enforces that regex filters only apply to a single line. I considered options for maintaining our very-broken status quo, but they were very, very difficult by comparison, and any multi-line filters are completely unsupported.
1 parent 4bf1d1b commit 1be7633

2 files changed

Lines changed: 60 additions & 45 deletions

File tree

meld/meldbuffer.py

Lines changed: 11 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,6 @@ class BufferLines:
224224
This class allows a Gtk.TextBuffer to be treated as a list of lines of
225225
possibly-filtered text. If no filter is given, the raw output from the
226226
Gtk.TextBuffer is used.
227-
228-
The logic here (and in places in FileDiff) requires that Python's
229-
unicode splitlines() implementation and Gtk.TextBuffer agree on where
230-
linebreaks occur. Happily, this is usually the case.
231227
"""
232228

233229
def __init__(self, buf, textfilter=None):
@@ -240,48 +236,18 @@ def __init__(self, buf, textfilter=None):
240236
def __getitem__(self, key):
241237
if isinstance(key, slice):
242238
lo, hi, _ = key.indices(self.buf.get_line_count())
243-
244-
# FIXME: If we ask for arbitrary slices past the end of the buffer,
245-
# this will return the last line.
246-
start = self.buf.get_iter_at_line_or_eof(lo)
239+
line_start = self.buf.get_iter_at_line_or_eof(lo)
247240
end = self.buf.get_iter_at_line_or_eof(hi)
248-
txt = self.buf.get_text(start, end, False)
249-
250-
filter_txt = self.textfilter(txt, self.buf, start, end)
251-
lines = filter_txt.splitlines()
252-
ends = filter_txt.splitlines(True)
253-
254-
# The last line in a Gtk.TextBuffer is guaranteed never to end in a
255-
# newline. As splitlines() discards an empty line at the end, we
256-
# need to artificially add a line if the requested slice is past
257-
# the end of the buffer, and the last line in the slice ended in a
258-
# newline.
259-
if hi >= self.buf.get_line_count() and \
260-
lo < self.buf.get_line_count() and \
261-
(len(lines) == 0 or len(lines[-1]) != len(ends[-1])):
262-
lines.append("")
263-
ends.append("")
264-
265-
hi = self.buf.get_line_count() if hi == sys.maxsize else hi
266-
if hi - lo != len(lines):
267-
# These codepoints are considered line breaks by Python, but
268-
# not by GtkTextStore.
269-
additional_breaks = set(('\x0c', '\x85', '\u2028'))
270-
i = 0
271-
while i < len(ends):
272-
line, end = lines[i], ends[i]
273-
# It's possible that the last line in a file would end in a
274-
# line break character, which requires no joining.
275-
if end and end[-1] in additional_breaks and \
276-
(not line or line[-1] not in additional_breaks):
277-
assert len(ends) >= i + 1
278-
lines[i:i + 2] = [line + end[-1] + lines[i + 1]]
279-
ends[i:i + 2] = [end + ends[i + 1]]
280-
else:
281-
# We only increment if we don't correct a line, to
282-
# handle the case of a single line having multiple
283-
# additional_breaks characters that need correcting.
284-
i += 1
241+
242+
lines = []
243+
while line_start.compare(end) < 0:
244+
line_end = line_start.copy()
245+
if not line_end.ends_line():
246+
line_end.forward_to_line_end()
247+
txt = self.buf.get_text(line_start, line_end, False)
248+
filter_txt = self.textfilter(txt, self.buf, line_start, end)
249+
lines.append(filter_txt)
250+
line_start.forward_visible_line()
285251

286252
return lines
287253

test/test_buffer_lines.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
2+
from unittest import mock
3+
4+
import pytest
5+
6+
from meld.meldbuffer import BufferLines, MeldBuffer
7+
8+
9+
text = ("""0
10+
1
11+
2
12+
3
13+
4
14+
5
15+
6
16+
7
17+
8
18+
9
19+
10
20+
""")
21+
22+
23+
@pytest.mark.parametrize("line_start, line_end, expected_text", [
24+
(0, 1, ["0"],),
25+
(0, 2, ["0", "1"],),
26+
# zero-sized slice
27+
(9, 9, [],),
28+
(9, 10, ["9"],),
29+
(9, 11, ["9", "10"],),
30+
# Past the end of the buffer
31+
(9, 12, ["9", "10"],),
32+
# Waaaay past the end of the buffer
33+
(9, 9999, ["9", "10"],),
34+
# And sidling towards past-the-end start indices
35+
(10, 12, ["10"],),
36+
(11, 12, [],),
37+
])
38+
def test_filter_text(line_start, line_end, expected_text):
39+
40+
import meld.meldbuffer
41+
42+
meld.meldbuffer.bind_settings = mock.MagicMock()
43+
meld.meldbuffer.meldsettings = mock.MagicMock(style_scheme=None)
44+
45+
buf = MeldBuffer()
46+
buf.set_text(text)
47+
48+
buffer_lines = BufferLines(buf)
49+
assert buffer_lines[line_start:line_end] == expected_text

0 commit comments

Comments
 (0)