diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b093843a3..6b0a52ad22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +- Fixed infinite loop with `cells.split_graphemes` + ## [14.3.2] - 2026-02-01 ### Fixed diff --git a/rich/cells.py b/rich/cells.py index 31165957b9..9d590b04e8 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -161,14 +161,19 @@ def _cell_len(text: str, unicode_version: str) -> int: def split_graphemes( text: str, unicode_version: str = "auto" ) -> "tuple[list[CellSpan], int]": - """Divide text into spans that define a single grapheme. + """Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string. + + The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero. + This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size. Args: text: String to split. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version. Returns: - List of spans. + A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples + of three values consisting of (, , ), where START and END are string indices, + and CELL LENGTH is the cell length of the single grapheme. """ cell_table = load_cell_table(unicode_version) @@ -181,32 +186,48 @@ def split_graphemes( SPECIAL = {"\u200d", "\ufe0f"} while index < codepoint_count: if (character := text[index]) in SPECIAL: + if not spans: + # ZWJ or variation selector at the beginning of the string doesn't really make sense. + # But handle it, we must. + spans.append((index, index := index + 1, 0)) + continue if character == "\u200d": # zero width joiner - index += 2 - if spans: - start, _end, cell_length = spans[-1] - spans[-1] = (start, index, cell_length) - elif last_measured_character: + # The condition handles the case where a ZWJ is at the end of the string, and has nothing to join + index += 2 if index < (codepoint_count - 1) else 1 + start, _end, cell_length = spans[-1] + spans[-1] = (start, index, cell_length) + else: # variation selector 16 index += 1 - if spans: + if last_measured_character: start, _end, cell_length = spans[-1] if last_measured_character in cell_table.narrow_to_wide: last_measured_character = None cell_length += 1 total_width += 1 spans[-1] = (start, index, cell_length) + else: + # No previous character to change the size of. + # Shouldn't occur in practice. + # But handle it, we must. + start, _end, cell_length = spans[-1] + spans[-1] = (start, index, cell_length) continue if character_width := get_character_cell_size(character, unicode_version): last_measured_character = character spans.append((index, index := index + 1, character_width)) total_width += character_width - elif spans: - # zero width characters are associated with the previous character - start, _end, cell_length = spans[-1] - spans[-1] = (start, index := index + 1, cell_length) + else: + # Character has zero width + if spans: + # zero width characters are associated with the previous character + start, _end, cell_length = spans[-1] + spans[-1] = (start, index := index + 1, cell_length) + else: + # A zero width character with no prior spans + spans.append((index, index := index + 1, 0)) return (spans, total_width) diff --git a/tests/test_cells.py b/tests/test_cells.py index f101740a01..39ba346e7e 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -134,6 +134,21 @@ def test_chop_cells_mixed_width(): assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"] +@pytest.mark.parametrize( + "text,expected", + [ + ("", []), + ("\x1b", []), + ("\x1b\x1b", []), + ("\x1b\x1b\x1b", []), + ("\x1b\x1b\x1b\x1b", []), + ], +) +def test_chop_cells_zero_width(text: str, expected: list[str]) -> None: + """Test zer width characters being chopped.""" + assert chop_cells(text, 3) == expected + + def test_is_single_cell_widths() -> None: # Check _is_single_cell_widths reports correctly for character in string.printable: @@ -172,12 +187,35 @@ def test_is_single_cell_widths() -> None: ("♻", [(0, 1, 1)], 1), ("♻️", [(0, 2, 2)], 2), ("♻♻️", [(0, 1, 1), (1, 3, 2)], 3), + ("\x1b", [(0, 1, 0)], 0), # One escape should have zero width + ("\x1b\x1b", [(0, 2, 0)], 0), # Two escapes should have zero width + ( + "\ufe0f", + [(0, 1, 0)], + 0, + ), # Variation selector 16, without anything to change should have zero width + ( + "\ufe0f\ufe0f", + [(0, 2, 0)], + 0, + ), # 2 X variation selector 16, without anything to change should have zero width + ( + "\u200d", + [(0, 1, 0)], + 0, + ), # A zero width joiner within noting prior should have zero width + ( + "\u200d\u200d", + [(0, 2, 0)], + 0, + ), # Two ZWJs should have zero width ], ) def test_split_graphemes( text: str, expected_spans: list[CellSpan], expected_cell_length: int ): spans, cell_length = split_graphemes(text) + print(spans) assert cell_len(text) == expected_cell_length assert spans == expected_spans assert cell_length == expected_cell_length