From 60b064a70a9db813e96955b7b344f61f62817f4c Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 14:02:23 +0000 Subject: [PATCH 1/7] fix for infinite loop in split_graphemes --- CHANGELOG.md | 6 ++++++ rich/cells.py | 2 ++ tests/test_cells.py | 2 ++ 3 files changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b093843a3..6b0a52ad22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +- Fixed infinite loop with `cells.split_graphemes` + ## [14.3.2] - 2026-02-01 ### Fixed diff --git a/rich/cells.py b/rich/cells.py index 31165957b9..a35065a782 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -207,6 +207,8 @@ def split_graphemes( # zero width characters are associated with the previous character start, _end, cell_length = spans[-1] spans[-1] = (start, index := index + 1, cell_length) + else: + index = index + 1 return (spans, total_width) diff --git a/tests/test_cells.py b/tests/test_cells.py index f101740a01..ce6fb99e76 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -172,6 +172,8 @@ def test_is_single_cell_widths() -> None: ("♻", [(0, 1, 1)], 1), ("♻️", [(0, 2, 2)], 2), ("♻♻️", [(0, 1, 1), (1, 3, 2)], 3), + ("\x1b", [], 0), + ("\x1b\x1b", [], 0), ], ) def test_split_graphemes( From c4db67aa039ac26041e702aa4db053ea56ac9a7f Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 15:49:21 +0000 Subject: [PATCH 2/7] test cases --- rich/cells.py | 20 ++++++++++++++------ tests/test_cells.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index a35065a782..bf8eaf5e1a 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -181,9 +181,14 @@ def split_graphemes( SPECIAL = {"\u200d", "\ufe0f"} while index < codepoint_count: if (character := text[index]) in SPECIAL: + if not spans: + spans.append((index, index := index + 1, 0)) + continue if character == "\u200d": # zero width joiner - index += 2 + index += 1 + if index != codepoint_count: + index += 1 if spans: start, _end, cell_length = spans[-1] spans[-1] = (start, index, cell_length) @@ -203,12 +208,15 @@ def split_graphemes( last_measured_character = character spans.append((index, index := index + 1, character_width)) total_width += character_width - elif spans: - # zero width characters are associated with the previous character - start, _end, cell_length = spans[-1] - spans[-1] = (start, index := index + 1, cell_length) else: - index = index + 1 + # Character has zero width + if spans: + # zero width characters are associated with the previous character + start, _end, cell_length = spans[-1] + spans[-1] = (start, index := index + 1, cell_length) + else: + # A zero width character with no prior spans + spans.append((index, index := index + 1, 0)) return (spans, total_width) diff --git a/tests/test_cells.py b/tests/test_cells.py index ce6fb99e76..609d035fec 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -134,6 +134,18 @@ def test_chop_cells_mixed_width(): assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"] +@pytest.mark.parametrize( + "text,expected", + [ + ("", []), + ("\x1b", []), + ("\x1b\x1b", []), + ], +) +def test_chop_cells_zero_width(text: str, expected: list) -> None: + assert chop_cells(text, 3) == expected + + def test_is_single_cell_widths() -> None: # Check _is_single_cell_widths reports correctly for character in string.printable: @@ -172,14 +184,30 @@ def test_is_single_cell_widths() -> None: ("♻", [(0, 1, 1)], 1), ("♻️", [(0, 2, 2)], 2), ("♻♻️", [(0, 1, 1), (1, 3, 2)], 3), - ("\x1b", [], 0), - ("\x1b\x1b", [], 0), + ("\x1b", [(0, 1, 0)], 0), # One escape sahould have zero width + ("\x1b\x1b", [(0, 2, 0)], 0), # Two escapes should have zero width + ( + "\ufe0f", + [(0, 1, 0)], + 0, + ), # Variation selector 16, without anything to change should have zero width + ( + "\u200d", + [(0, 1, 0)], + 0, + ), # A zero width joiner within noting prior should have zero width + ( + "\u200d\u200d", + [(0, 2, 0)], + 0, + ), # Two ZWJs should have zero width ], ) def test_split_graphemes( text: str, expected_spans: list[CellSpan], expected_cell_length: int ): spans, cell_length = split_graphemes(text) + print(spans) assert cell_len(text) == expected_cell_length assert spans == expected_spans assert cell_length == expected_cell_length From 646aff8e44739869a1eff41c8e56b9fff3f38ef1 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 15:53:14 +0000 Subject: [PATCH 3/7] more defensive condition --- rich/cells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rich/cells.py b/rich/cells.py index bf8eaf5e1a..a25fd51012 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -187,7 +187,7 @@ def split_graphemes( if character == "\u200d": # zero width joiner index += 1 - if index != codepoint_count: + if index < codepoint_count: index += 1 if spans: start, _end, cell_length = spans[-1] From 87e7ca27d0f8047e2d22c6ee9f7cf2c59e4c96af Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 16:44:19 +0000 Subject: [PATCH 4/7] refinements, and tests --- rich/cells.py | 31 +++++++++++++++++++++---------- tests/test_cells.py | 8 ++++++++ 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index a25fd51012..fe84f1a824 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -161,14 +161,19 @@ def _cell_len(text: str, unicode_version: str) -> int: def split_graphemes( text: str, unicode_version: str = "auto" ) -> "tuple[list[CellSpan], int]": - """Divide text into spans that define a single grapheme. + """Divide text into spans that define a single grapheme, and additonally return the cell length of the whole string. + + The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero. + This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size. Args: text: String to split. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version. Returns: - List of spans. + A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples + of three values consisting of (, , ), where START and END are string indices, + and CELL LENGTH is the cell length of the single grapheme. """ cell_table = load_cell_table(unicode_version) @@ -182,26 +187,32 @@ def split_graphemes( while index < codepoint_count: if (character := text[index]) in SPECIAL: if not spans: + # ZWJ or variation selector at the beginning of the string doesn't really make sense. + # But handle it, we must. spans.append((index, index := index + 1, 0)) continue if character == "\u200d": # zero width joiner - index += 1 - if index < codepoint_count: - index += 1 - if spans: - start, _end, cell_length = spans[-1] - spans[-1] = (start, index, cell_length) - elif last_measured_character: + # The condition handles the case where a ZWJ is at the end of the string, and has nothing to join + index += 2 if index < (codepoint_count - 1) else 1 + start, _end, cell_length = spans[-1] + spans[-1] = (start, index, cell_length) + else: # variation selector 16 index += 1 - if spans: + if last_measured_character: start, _end, cell_length = spans[-1] if last_measured_character in cell_table.narrow_to_wide: last_measured_character = None cell_length += 1 total_width += 1 spans[-1] = (start, index, cell_length) + else: + # No previous character to change the size of. + # Shouldn't occur in practice. + # But handle it, we must. + start, _end, cell_length = spans[-1] + spans[-1] = (start, index, cell_length) continue if character_width := get_character_cell_size(character, unicode_version): diff --git a/tests/test_cells.py b/tests/test_cells.py index 609d035fec..5aefda524c 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -140,9 +140,12 @@ def test_chop_cells_mixed_width(): ("", []), ("\x1b", []), ("\x1b\x1b", []), + ("\x1b\x1b\x1b", []), + ("\x1b\x1b\x1b\x1b", []), ], ) def test_chop_cells_zero_width(text: str, expected: list) -> None: + """Test zer width characters being chopped.""" assert chop_cells(text, 3) == expected @@ -191,6 +194,11 @@ def test_is_single_cell_widths() -> None: [(0, 1, 0)], 0, ), # Variation selector 16, without anything to change should have zero width + ( + "\ufe0f\ufe0f", + [(0, 2, 0)], + 0, + ), # 2 X variation selector 16, without anything to change should have zero width ( "\u200d", [(0, 1, 0)], From b618ccc7e310a92e555d6ec38a7f390b37192ef3 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 16:51:51 +0000 Subject: [PATCH 5/7] spelling --- rich/cells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rich/cells.py b/rich/cells.py index fe84f1a824..9d590b04e8 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -161,7 +161,7 @@ def _cell_len(text: str, unicode_version: str) -> int: def split_graphemes( text: str, unicode_version: str = "auto" ) -> "tuple[list[CellSpan], int]": - """Divide text into spans that define a single grapheme, and additonally return the cell length of the whole string. + """Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string. The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero. This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size. From b031dca1d74f1f82a02d71e771b116ec4f9ad6c8 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 16:55:55 +0000 Subject: [PATCH 6/7] Update tests/test_cells.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_cells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cells.py b/tests/test_cells.py index 5aefda524c..e7e04fa8d5 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -187,7 +187,7 @@ def test_is_single_cell_widths() -> None: ("♻", [(0, 1, 1)], 1), ("♻️", [(0, 2, 2)], 2), ("♻♻️", [(0, 1, 1), (1, 3, 2)], 3), - ("\x1b", [(0, 1, 0)], 0), # One escape sahould have zero width + ("\x1b", [(0, 1, 0)], 0), # One escape should have zero width ("\x1b\x1b", [(0, 2, 0)], 0), # Two escapes should have zero width ( "\ufe0f", From 905b397deb6b5ed59dfd80fa499c90e215f309c8 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 19 Feb 2026 16:56:04 +0000 Subject: [PATCH 7/7] Update tests/test_cells.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_cells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cells.py b/tests/test_cells.py index e7e04fa8d5..39ba346e7e 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -144,7 +144,7 @@ def test_chop_cells_mixed_width(): ("\x1b\x1b\x1b\x1b", []), ], ) -def test_chop_cells_zero_width(text: str, expected: list) -> None: +def test_chop_cells_zero_width(text: str, expected: list[str]) -> None: """Test zer width characters being chopped.""" assert chop_cells(text, 3) == expected