Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Fixed

- Fixed infinite loop with `cells.split_graphemes`

## [14.3.2] - 2026-02-01

### Fixed
Expand Down
45 changes: 33 additions & 12 deletions rich/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,19 @@ def _cell_len(text: str, unicode_version: str) -> int:
def split_graphemes(
text: str, unicode_version: str = "auto"
) -> "tuple[list[CellSpan], int]":
"""Divide text into spans that define a single grapheme.
"""Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string.

The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.
This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.

Args:
text: String to split.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.

Returns:
List of spans.
A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples
of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,
and CELL LENGTH is the cell length of the single grapheme.
"""

cell_table = load_cell_table(unicode_version)
Expand All @@ -181,32 +186,48 @@ def split_graphemes(
SPECIAL = {"\u200d", "\ufe0f"}
while index < codepoint_count:
if (character := text[index]) in SPECIAL:
if not spans:
# ZWJ or variation selector at the beginning of the string doesn't really make sense.
# But handle it, we must.
spans.append((index, index := index + 1, 0))
continue
if character == "\u200d":
# zero width joiner
index += 2
if spans:
start, _end, cell_length = spans[-1]
spans[-1] = (start, index, cell_length)
elif last_measured_character:
# The condition handles the case where a ZWJ is at the end of the string, and has nothing to join
index += 2 if index < (codepoint_count - 1) else 1
start, _end, cell_length = spans[-1]
spans[-1] = (start, index, cell_length)
else:
# variation selector 16
index += 1
if spans:
if last_measured_character:
start, _end, cell_length = spans[-1]
if last_measured_character in cell_table.narrow_to_wide:
last_measured_character = None
cell_length += 1
total_width += 1
spans[-1] = (start, index, cell_length)
else:
# No previous character to change the size of.
# Shouldn't occur in practice.
# But handle it, we must.
start, _end, cell_length = spans[-1]
spans[-1] = (start, index, cell_length)
continue

if character_width := get_character_cell_size(character, unicode_version):
last_measured_character = character
spans.append((index, index := index + 1, character_width))
total_width += character_width
elif spans:
# zero width characters are associated with the previous character
start, _end, cell_length = spans[-1]
spans[-1] = (start, index := index + 1, cell_length)
else:
# Character has zero width
if spans:
# zero width characters are associated with the previous character
start, _end, cell_length = spans[-1]
spans[-1] = (start, index := index + 1, cell_length)
else:
# A zero width character with no prior spans
spans.append((index, index := index + 1, 0))

return (spans, total_width)

Expand Down
38 changes: 38 additions & 0 deletions tests/test_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,21 @@ def test_chop_cells_mixed_width():
assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]


@pytest.mark.parametrize(
"text,expected",
[
("", []),
("\x1b", []),
("\x1b\x1b", []),
("\x1b\x1b\x1b", []),
("\x1b\x1b\x1b\x1b", []),
],
)
def test_chop_cells_zero_width(text: str, expected: list[str]) -> None:
"""Test zer width characters being chopped."""
assert chop_cells(text, 3) == expected


def test_is_single_cell_widths() -> None:
# Check _is_single_cell_widths reports correctly
for character in string.printable:
Expand Down Expand Up @@ -172,12 +187,35 @@ def test_is_single_cell_widths() -> None:
("♻", [(0, 1, 1)], 1),
("♻️", [(0, 2, 2)], 2),
("♻♻️", [(0, 1, 1), (1, 3, 2)], 3),
("\x1b", [(0, 1, 0)], 0), # One escape should have zero width
("\x1b\x1b", [(0, 2, 0)], 0), # Two escapes should have zero width
(
"\ufe0f",
Comment thread
willmcgugan marked this conversation as resolved.
[(0, 1, 0)],
0,
), # Variation selector 16, without anything to change should have zero width
(
"\ufe0f\ufe0f",
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in comment: "within noting prior" → "with nothing prior".

Copilot uses AI. Check for mistakes.
[(0, 2, 0)],
0,
), # 2 X variation selector 16, without anything to change should have zero width
(
"\u200d",
[(0, 1, 0)],
0,
), # A zero width joiner within noting prior should have zero width
(
"\u200d\u200d",
[(0, 2, 0)],
0,
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the print(spans) debug output from this test; it will add noise to test runs/CI output and isn't needed for assertions.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The print is removed by pytest if the test passes.

), # Two ZWJs should have zero width
],
)
def test_split_graphemes(
text: str, expected_spans: list[CellSpan], expected_cell_length: int
):
spans, cell_length = split_graphemes(text)
print(spans)
assert cell_len(text) == expected_cell_length
assert spans == expected_spans
assert cell_length == expected_cell_length
Expand Down
Loading