diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7430c8..7f27750 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,18 +90,8 @@ jobs: shell: bash run: | V=${{ matrix.python-version }} - - if [[ "$V" = pypy-* ]]; then - V=$(echo $V | tr -d .-) - IS_PYPY=1 - else - V=py$(echo $V | tr -d .) - IS_PYPY=0 - fi - - echo IS_PYPY=$IS_PYPY >>$GITHUB_ENV + V=py$(echo $V | tr -d .) echo TOX_PYTHON=$V >>$GITHUB_ENV - python -Im pip install tox - name: Prepare sdist and source-dir diff --git a/docs/api.rst b/docs/api.rst index 901b019..8e8f49b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -40,4 +40,8 @@ requirements.txt or equivalent. Their signatures will never change. .. autofunction:: wcwidth.HyperlinkParams +.. autofunction:: wcwidth.TextSizing + +.. autofunction:: wcwidth.TextSizingParams + .. _SEMVER: https://semver.org diff --git a/docs/intro.rst b/docs/intro.rst index 687409e..e760012 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -148,6 +148,12 @@ Use function `width()`_ to measure a string with improved handling of ``control_ >>> # or ignored, >>> wcwidth.width('hello\x1b[5Dworld', control_codes='ignore') 10 + >>> # Measure width of text using kitty text sizing protocol (OSC 66), + >>> width('\x1b]66;w=2;XY\x07') + 2 + >>> # Scaled text sizing: each grapheme occupies 'scale' cells + >>> width('\x1b]66;s=2;ABC\x07') + 6 Use ``control_codes='ignore'`` when the input is known not to contain any control characters or terminal sequences for slightly improved performance. Note that TAB (``'\t'``) is a control @@ -178,7 +184,6 @@ such as clearing the screen, vertical, or absolute cursor movement will raise `` ... ValueError: Cursor left movement at position 1 would move 5 cells left from column 1, exceeding string start - iter_sequences() ---------------- @@ -311,6 +316,10 @@ Use `clip()`_ to extract a substring by column positions, preserving terminal se >>> clip('\x1b]8;;http://example.com\x07Click This link\x1b]8;;\x07', 8, 15) '\x1b]8;;http://example.com\x07is link\x1b]8;;\x07' + >>> # and OSC 66 kitty text sizing, supporting width and scale, 'Look' -> '...ook' + >>> clip('\x1b]66;w=4:s=4;Look\x07', 1, 16, fillchar='.') + '...\x1b]66;s=4:w=3;ook\x07' + Use ``overtyping=False`` when the input is known not to contain any cursor movement characters (``\b``, ``\r``, ``CSI C``, ``CSI D``, ``CSI G``) for improved performance. When ``overtyping=None`` (default), a slower "Painter's algorithm" may be used after testing for the @@ -526,7 +535,8 @@ History 0.7.0 *2026-04-30* * **New** `clip()`_ parameter ``control_codes='parse'``, ``'ignore'``, and ``'strict'``. `clip()`_ - is now able to clip OSC 8 hyperlinks. + is now able to clip OSC 8 hyperlinks and OSC 66 text sizing sequences. + * **New** support for `kitty text sizing protocol`_ (OSC 66) in `width()`_ and `clip()`_. * **Improved** `clip()`_ and `width()`_ to support horizontal cursor sequences (``cub``, ``cuf``, ``hpa``). Cursor-left (``cub``) or backspace (``\b``) now overwrites text. ``column_address`` (``hpa``) and carriage return (``\r``) are now parsed, and some values conditionally raise @@ -817,6 +827,8 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`clip()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.clip .. _`strip_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.strip_sequences .. _`propagate_sgr()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.propagate_sgr +.. _`TextSizing`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.TextSizing +.. _`TextSizingParams`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.TextSizingParams .. _`iter_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_sequences .. _`list_versions()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.list_versions .. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/ diff --git a/requirements-tests38.in b/requirements-tests38.in index ea2794e..cc51f94 100644 --- a/requirements-tests38.in +++ b/requirements-tests38.in @@ -1,5 +1,5 @@ # for python3.8 pytest<7 pytest-cov -coverage[toml]<6 +coverage[toml]<=7.6.1 packaging<26 diff --git a/requirements-tests38.txt b/requirements-tests38.txt index 2b8a269..1077443 100644 --- a/requirements-tests38.txt +++ b/requirements-tests38.txt @@ -6,7 +6,7 @@ # attrs==25.3.0 # via pytest -coverage==5.5 +coverage==7.6.1 # via # -r requirements-tests38.in # pytest-cov @@ -27,6 +27,6 @@ pytest==6.2.5 pytest-cov==5.0.0 # via -r requirements-tests38.in toml==0.10.2 - # via - # coverage - # pytest + # via pytest +tomli==2.4.1 + # via coverage diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index e6d3b44..3b79ece 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -317,6 +317,18 @@ def test_clip_osc8_hyperlinks(benchmark): benchmark(wcwidth.clip, text, 0, 80) +def test_width_osc66(benchmark): + """Benchmark width() with OSC 66 text sizing sequences.""" + text = '\x1b]66;w=2;XY\x07\x1b]66;s=3;ABC\x07' + benchmark(wcwidth.width, text) + + +def test_clip_osc66(benchmark): + """Benchmark clip() with OSC 66 text sizing sequences.""" + text = '\x1b]66;w=2;XY\x07\x1b]66;s=3;ABC\x07' + benchmark(wcwidth.clip, text, 3, 8) + + def test_clip_cursor_cr_overwrite(benchmark): """Benchmark clip() with carriage-return overwrite (painter path).""" text = 'hello\rworld ' * 20 diff --git a/tests/test_text_sizing.py b/tests/test_text_sizing.py new file mode 100644 index 0000000..b5e1808 --- /dev/null +++ b/tests/test_text_sizing.py @@ -0,0 +1,327 @@ +"""Tests for Text Sizing Protocol (OSC 66) support.""" + +# 3rd party +import pytest + +# local +from wcwidth import (TextSizing, + TextSizingParams, + clip, + width, + wcswidth, + iter_sequences, + strip_sequences) +from wcwidth.text_sizing import TEXT_FIELD_MAPPING +from wcwidth.escape_sequences import TEXT_SIZING_PATTERN + +_W_HI = TEXT_FIELD_MAPPING['w'].high +_N_HI = TEXT_FIELD_MAPPING['n'].high +_D_HI = TEXT_FIELD_MAPPING['d'].high + +CONTROL_CODES_PARAMS_CASES = [ + ('x=2', "", "Unknown text sizing field 'x' in "), + ('s=3:x=3', "s=3", "Unknown text sizing field 'x' in "), + ('s=2:x=3:w=9', f"s=2:w={_W_HI}", "Unknown text sizing field 'x' in "), + ('xyz=2', "", "Unknown text sizing field 'xyz' in "), + ('xxx', "", "Expected '=' in text sizing parameter"), + ('s=xxx', "", "Illegal text sizing value 'xxx' in "), + ('s=-99', "", "Out of bounds text sizing value '-99' in "), + ('s=99', f"s={_W_HI}", "Out of bounds text sizing value '99' in "), + ('w=-1', "", "Out of bounds text sizing value '-1' in "), + ('w=8', f"w={_W_HI}", "Out of bounds text sizing value '8' in "), + ('n=20', f"n={_N_HI}", "Out of bounds text sizing value '20' in "), + ('d=99', f"d={_D_HI}", "Out of bounds text sizing value '99' in "), + ('v=5', "v=2", "Out of bounds text sizing value '5' in "), + ('h=3', "h=2", "Out of bounds text sizing value '3' in "), +] + + +@pytest.mark.parametrize('given_params,expected_remainder,expected_exc,', CONTROL_CODES_PARAMS_CASES) +def test_text_sizing_params_control_codes(given_params, expected_remainder, expected_exc): + """Verify control_codes='strict' and 'parse' behavior in TextSizingParams.from_params().""" + # assert control_codes='strict' raises expected exception, + with pytest.raises(ValueError) as exc_info: + TextSizingParams.from_params(given_params, control_codes='strict') + assert exc_info.value.args[0].startswith(expected_exc) + + # when 'parse' (default), any illegal argument or value is filtered, excluded, or clipped + params = TextSizingParams.from_params(given_params) + assert params.make_sequence() == expected_remainder + + +@pytest.mark.parametrize('given_params,expected_remainder,expected_exc,', CONTROL_CODES_PARAMS_CASES) +def test_text_sizing_width_control_codes(given_params, expected_remainder, expected_exc): + """Verify control_codes='strict' with invalid OSC 66 sequences in wciwdth.width().""" + seq1 = '\x1b]66;' + given_params + ';ABC' + '\x07' + seq2 = '\x1b]66;' + given_params + ';ABC' + '\x1b\\' + for seq in (seq1, seq2): + with pytest.raises(ValueError) as exc_info: + width(seq, control_codes='strict') + assert exc_info.value.args[0].startswith(expected_exc) + + +@pytest.mark.parametrize('params,expected_repr', [ + (TextSizingParams(), 'TextSizingParams()'), + (TextSizingParams(scale=2, width=1), 'TextSizingParams(scale=2, width=1)'), + (TextSizingParams(scale=2, width=3, numerator=1, denominator=2, + vertical_align=1, horizontal_align=2), + 'TextSizingParams(scale=2, width=3, numerator=1, denominator=2, ' + 'vertical_align=1, horizontal_align=2)'), +]) +def test_text_sizing_params_repr(params, expected_repr): + """Verify TextSizingParams.__repr__ output.""" + assert repr(params) == expected_repr + + +@pytest.mark.parametrize('params,text,expected_width', [ + # cases of static width=N values, + (TextSizingParams(scale=2, width=1), 'climclam', 2), + (TextSizingParams(scale=2, width=3), 'anything', 6), + (TextSizingParams(scale=1, width=5), '', 5), + (TextSizingParams(scale=3, width=1), 'x', 3), + # and automatic width (width=0) values, + (TextSizingParams(), '', 0), + (TextSizingParams(), 'AB', 2), + (TextSizingParams(), '中', 2), + (TextSizingParams(scale=2), 'AB', 4), + (TextSizingParams(scale=2), '中', 4), + (TextSizingParams(scale=3), '', 0), + (TextSizingParams(scale=7, width=7, numerator=15, denominator=15, + vertical_align=2, horizontal_align=2), 'x!yzzy', 49), +]) +def test_text_sizing_width(params, text, expected_width): + """Verify width using with both kinds of terminator.""" + # verify internal TextSizing.display_width() result, + assert TextSizing(params, text, terminator='\x07').display_width() == expected_width + assert TextSizing(params, text, terminator='\x1b\\').display_width() == expected_width + seq1 = TextSizing(params, text, terminator='\x07').make_sequence() + seq2 = TextSizing(params, text, terminator='\x1b\\').make_sequence() + + # verify round-trip + ts_match1, ts_match2 = TEXT_SIZING_PATTERN.match(seq1), TEXT_SIZING_PATTERN.match(seq2) + assert ts_match1 and ts_match2 + assert TextSizing.from_match(ts_match1) == TextSizing(params, text, terminator='\x07') + assert TextSizing.from_match(ts_match2) == TextSizing(params, text, terminator='\x1b\\') + + # and external width(), + assert width(seq1) == expected_width + assert width(seq2) == expected_width + + # verify 'strict' does not raise ValueError + width(seq1, control_codes='strict') + width(seq2, control_codes='strict') + + # and verify 'ignore' measures only inner_text (does not parse scale or width) + assert width(seq1, control_codes='ignore') == wcswidth(text) + assert width(seq2, control_codes='ignore') == wcswidth(text) + + +@pytest.mark.parametrize('given_sequence,expected_text,expected_params,expected_width', [ + ('\x1b]66;s=2:w=2;AB\x07', 'AB', 's=2:w=2', 4), + ('\x1b]66;s=2:w=2;\u4e2d\x07', '\u4e2d', 's=2:w=2', 4), + ('\x1b]66;s=3:w=1;x\x07', 'x', 's=3:w=1', 3), + ('\x1b]66;w=5;hello\x07', 'hello', 'w=5', 5), + ('\x1b]66;s=2:w=3;anything\x07', 'anything', 's=2:w=3', 6), + ('\x1b]66;w=3;x\x07', 'x', 'w=3', 3), + ('\x1b]66;s=1;AB\x07', 'AB', '', 2), + ('\x1b]66;s=2;AB\x07', 'AB', 's=2', 4), + ('\x1b]66;s=2;中\x07', '中', 's=2', 4), + ('\x1b]66;s=2;\x07', '', 's=2', 0), + ('\x1b]66;s=1:w=1;\x07', '', 'w=1', 1), + ('\x1b]66;w=2;A\x07', 'A', 'w=2', 2), + ('\x1b]66;s=2:w=3;text\x1b\\', 'text', 's=2:w=3', 6), +]) +def test_text_sizing_sequence(given_sequence, expected_text, expected_params, expected_width): + """Verify parsing and measured width of raw OSC 66 sequence.""" + ts_match = TEXT_SIZING_PATTERN.match(given_sequence) + assert ts_match is not None + text_size = TextSizing.from_match(ts_match) + assert text_size.params.make_sequence() == expected_params + assert text_size.text == expected_text + assert width(given_sequence, control_codes='parse') == expected_width + assert width(given_sequence, control_codes='strict') == expected_width + assert width(given_sequence, control_codes='ignore') == wcswidth(expected_text) + + +@pytest.mark.parametrize('text,expected', [ + ('\x1b]66;s=2:w=3:n=1:d=2:v=1:h=2;x!yzzy\x1b\\', 6), + ('\x1b]66;s=2:w=3;anything\x07', 6), + ('\x1b]66;w=3;x\x07', 3), + ('\x1b]66;s=1:w=0;AB\x07', 2), + ('\x1b]66;s=2:w=0;AB\x07', 4), + ('\x1b]66;s=2:w=0;\u4e2d\x07', 4), # '中' + ('\x1b]66;s=1:w=0;\x07', 0), + ('abc\x1b]66;w=3;x\x07def', 9), + ('\x1b]66;w=2;A\x07\x1b]66;w=3;B\x07', 5), + ('\x1b]66;s=2:w=3;text\x1b\\', 6), + ('\x1b[31m\x1b]66;w=2;AB\x07\x1b[0m', 2), +]) +def test_strings_with_text_sizing(text, expected): + """Verify measured width strings containing OSC66.""" + assert width(text) == expected + assert width(text, control_codes='strict') == expected + + +@pytest.mark.parametrize('text,expected', [ + ('\x1b]66;s=2;hello\x07', 'hello'), + ('\x1b]66;s=2;hello\x1b\\', 'hello'), + ('\x1b]66;;text\x07', 'text'), + ('\x1b]66;s=3:w=2;\x07', ''), + ('abc\x1b]66;w=2;XY\x07def', 'abcXYdef'), + ('\x1b[31m\x1b]66;s=2;red\x07\x1b[0m', 'red'), + ('\x1b]66;w=1;A\x07\x1b]66;w=1;B\x07', 'AB'), +]) +def test_strip_strings_with_text_sizing(text, expected): + assert strip_sequences(text) == expected + + +@pytest.mark.parametrize('text,expected_segs', [ + ('abc\x1b]66;s=2;hello\x07def', [('abc', False), ('\x1b]66;s=2;hello\x07', True), ('def', False)]), + ('abc\x1b]66;s=2;n=1,d=2,w=3;hello\x1b\\def', [('abc', False), ('\x1b]66;s=2;n=1,d=2,w=3;hello\x1b\\', True), ('def', False)]), +]) +def test_iter_sequences_text_sizing(text, expected_segs): + assert list(iter_sequences(text)) == expected_segs + + +@pytest.mark.parametrize('text,start,end,expected', [ + ('\x1b]66;w=3;ABC\x07', 0, 3, '\x1b]66;w=3;ABC\x07'), + ('\x1b]66;w=3;ABC\x07', 0, 2, '\x1b]66;w=2;AB\x07'), + ('\x1b]66;w=3;ABC\x07', 1, 3, '\x1b]66;w=2;BC\x07'), + ('ab\x1b]66;w=2;XY\x07cd', 0, 6, 'ab\x1b]66;w=2;XY\x07cd'), + ('ab\x1b]66;w=2;XY\x07cd', 0, 3, 'ab\x1b]66;w=1;X\x07'), + ('ab\x1b]66;w=2;XY\x07cd', 3, 6, '\x1b]66;w=1;Y\x07cd'), + ('ab\x1b]66;w=2;XY\x07cd', 4, 6, 'cd'), +]) +def test_clip_text_sizing_basic(text, start, end, expected): + """Test basic support of clip() with text sizing sequence.""" + assert repr(clip(text, start, end)) == repr(expected) + + +@pytest.mark.parametrize('text,start,end,expected', [ + ('\x1b]66;s=2;ABC\x07', 0, 0, ''), + ('\x1b]66;s=2;ABC\x07', 6, 6, ''), + ('\x1b]66;s=2;ABC\x07', 0, 2, '\x1b]66;s=2;A\x07'), + ('\x1b]66;s=2;ABC\x07', 0, 4, '\x1b]66;s=2;AB\x07'), + ('\x1b]66;s=2;ABC\x07', 0, 6, '\x1b]66;s=2;ABC\x07'), + ('\x1b]66;s=2;ABC\x07', 2, 6, '\x1b]66;s=2;BC\x07'), + ('\x1b]66;s=2;ABC\x07', 4, 6, '\x1b]66;s=2;C\x07'), +]) +def test_clip_text_sizing_scaled(text, start, end, expected): + """Test support of clip() with scale=N arguments.""" + assert repr(clip(text, start, end)) == repr(expected) + + +@pytest.mark.parametrize('text,start,end,expected', [ + # a b c + # === === === + # 012 345 678 + # . + # .. + # *a* + # *a* . + # ... *b* + # ... *b* . + # ... *b* .. + # ... *b* *c* + ('\x1b]66;s=3;ABC\x07', 0, 0, ''), + ('\x1b]66;s=3;ABC\x07', 0, 1, '.'), + ('\x1b]66;s=3;ABC\x07', 0, 2, '..'), + ('\x1b]66;s=3;ABC\x07', 0, 3, '\x1b]66;s=3;A\x07'), + ('\x1b]66;s=3;ABC\x07', 0, 4, '\x1b]66;s=3;A\x07.'), + ('\x1b]66;s=3;ABC\x07', 0, 5, '\x1b]66;s=3;A\x07..'), + ('\x1b]66;s=3;ABC\x07', 0, 6, '\x1b]66;s=3;AB\x07'), + ('\x1b]66;s=3;ABC\x07', 0, 7, '\x1b]66;s=3;AB\x07.'), + ('\x1b]66;s=3;ABC\x07', 0, 8, '\x1b]66;s=3;AB\x07..'), + ('\x1b]66;s=3;ABC\x07', 0, 9, '\x1b]66;s=3;ABC\x07'), + ('\x1b]66;s=3;ABC\x07', 0, 10, '\x1b]66;s=3;ABC\x07'), + # a b + # === === === + # 012 345 678 + # . 1, 2 + # .. 1, 3 + # .. . 1, 4 + # .. .. 1, 5 + # .. *b* 1, 6 + # .. *b* . 1, 7 + # .. *b* .. 1, 8 + # .. *b* *c* 1, 9 + ('\x1b]66;s=3;ABC\x07', 1, 1, ''), + ('\x1b]66;s=3;ABC\x07', 1, 2, '.'), + ('\x1b]66;s=3;ABC\x07', 1, 3, '..'), + ('\x1b]66;s=3;ABC\x07', 1, 4, '...'), + ('\x1b]66;s=3;ABC\x07', 1, 5, '....'), + ('\x1b]66;s=3;ABC\x07', 1, 6, '..\x1b]66;s=3;B\x07'), + ('\x1b]66;s=3;ABC\x07', 1, 7, '..\x1b]66;s=3;B\x07.'), + ('\x1b]66;s=3;ABC\x07', 1, 8, '..\x1b]66;s=3;B\x07..'), + ('\x1b]66;s=3;ABC\x07', 1, 9, '..\x1b]66;s=3;BC\x07'), + ('\x1b]66;s=3;ABC\x07', 1, 10, '..\x1b]66;s=3;BC\x07'), + # two-thirds of string 'A' and half of string 'B' is fillchar + # ('\x1b]66;s=3;ABC\x07', 2, 4, '..'), + # half of string 'A' and all of string 'B' + # a b + # === === === + # 012 345 678 + # . 2, 3 + # . . 2, 4 + # . .. 2, 5 + # . *b* 2, 6 + # . *b* . 2, 7 + # . *b* .. 2, 8 + # . *b* *c* 2, 9 + ('\x1b]66;s=3;ABC\x07', 2, 2, ''), + ('\x1b]66;s=3;ABC\x07', 2, 3, '.'), + ('\x1b]66;s=3;ABC\x07', 2, 4, '..'), + ('\x1b]66;s=3;ABC\x07', 2, 5, '...'), + ('\x1b]66;s=3;ABC\x07', 2, 6, '.\x1b]66;s=3;B\x07'), + ('\x1b]66;s=3;ABC\x07', 2, 7, '.\x1b]66;s=3;B\x07.'), + ('\x1b]66;s=3;ABC\x07', 2, 8, '.\x1b]66;s=3;B\x07..'), + ('\x1b]66;s=3;ABC\x07', 2, 9, '.\x1b]66;s=3;BC\x07'), + ('\x1b]66;s=3;ABC\x07', 2, 10, '.\x1b]66;s=3;BC\x07'), + # and now 3:10, should be easy ... + ('\x1b]66;s=3;ABC\x07', 3, 3, ''), + ('\x1b]66;s=3;ABC\x07', 3, 4, '.'), + ('\x1b]66;s=3;ABC\x07', 3, 5, '..'), + ('\x1b]66;s=3;ABC\x07', 3, 6, '\x1b]66;s=3;B\x07'), + ('\x1b]66;s=3;ABC\x07', 3, 7, '\x1b]66;s=3;B\x07.'), + ('\x1b]66;s=3;ABC\x07', 3, 8, '\x1b]66;s=3;B\x07..'), + ('\x1b]66;s=3;ABC\x07', 3, 9, '\x1b]66;s=3;BC\x07'), + ('\x1b]66;s=3;ABC\x07', 3, 10, '\x1b]66;s=3;BC\x07'), +]) +def test_clip_text_sizing_scaled_with_fillchar(text, start, end, expected): + """Test support of clip() with scale=N and fillchar is needed to fill remainder.""" + assert repr(clip(text, start, end, fillchar='.')) == repr(expected) + + +def test_clip_simple_path_padding(): + """Simple-path clip with w=N larger than text length exercises padding loop.""" + # w=4 but only 1 grapheme 'X' — 3 empty units are padded. + # Clip window (0, 1) forces partial overlap, triggering + # _text_sizing_clip_simple's padding branch. + assert repr(clip('\x1b]66;w=4;X\x07', 0, 1)) == repr('\x1b]66;w=1;X\x07') + + +@pytest.mark.parametrize('text,start,end,expected', [ + # CR forces painter path; fully-visible text sizing sequence + ('\r\x1b]66;w=2;XY\x07', 0, 3, '\x1b]66;w=2;XY\x07'), + # CR painter path, text sizing partially clipped (first unit visible) + ('\r\x1b]66;w=2;XY\x07', 0, 1, '\x1b]66;w=1;X\x07'), + # BS forces painter path; text sizing fully visible + ('ab\b\b\x1b]66;w=2;XY\x07', 0, 4, '\x1b]66;w=2;XY\x07'), + # Painter path with partial text sizing overlap (exercises _text_sizing_clip_painter) + ('\ra\x1b]66;s=2;BC\x07', 0, 3, 'a\x1b]66;s=2;B\x07'), + # Painter path: text sizing scaled partial overlap with fillchar + ('\r\x1b]66;s=3;ABC\x07', 1, 6, ' \x1b]66;s=3;B\x07'), + # CSI movement + text sizing fully visible + ('ab\x1b[2D\x1b]66;w=2;XY\x07', 0, 4, '\x1b]66;w=2;XY\x07'), + # Painter path: text sizing entirely outside clip window (before start) + ('\r\x1b]66;w=2;XY\x07', 2, 4, ''), + # CR + text sizing with auto-width (w=0), partial overlap + ('\ra\x1b]66;s=2;BC\x07', 0, 5, 'a\x1b]66;s=2;BC\x07'), + # Painter path: padding when w=N has more units than graphemes + ('\r\x1b]66;w=3;A\x07', 0, 2, '\x1b]66;w=2;A\x07'), + # Painter path: text sizing with unit entirely before clip window (skip path) + ('\r\x1b]66;s=2;ABCD\x07', 4, 8, '\x1b]66;s=2;CD\x07'), +]) +def test_clip_text_sizing_painter(text, start, end, expected): + """Test clip() with text sizing sequences in the cursor-movement (painter) path.""" + assert repr(clip(text, start, end)) == repr(expected) diff --git a/wcwidth/__init__.py b/wcwidth/__init__.py index 2bab0b5..7c893e9 100644 --- a/wcwidth/__init__.py +++ b/wcwidth/__init__.py @@ -21,6 +21,7 @@ from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH +from .text_sizing import TextSizing, TextSizingParams from .table_ambiguous import AMBIGUOUS_EASTASIAN from .escape_sequences import iter_sequences, strip_sequences from .unicode_versions import list_versions @@ -42,7 +43,8 @@ __all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes', 'iter_graphemes_reverse', 'grapheme_boundary_before', 'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences', - 'list_versions', 'propagate_sgr', 'Hyperlink', 'HyperlinkParams') + 'list_versions', 'propagate_sgr', 'Hyperlink', 'HyperlinkParams', + 'TextSizing', 'TextSizingParams') # Using 'hatchling', it does not seem to provide the pyproject.toml nicety, "dynamic = ['version']" # like flit_core, maybe there is some better way but for now we have to duplicate it in both places diff --git a/wcwidth/_clip.py b/wcwidth/_clip.py index aa26807..df67b63 100644 --- a/wcwidth/_clip.py +++ b/wcwidth/_clip.py @@ -3,8 +3,9 @@ # std imports import enum +from itertools import islice -from typing import Literal, Optional, NamedTuple +from typing import Literal, Callable, Optional, NamedTuple # local from ._width import width @@ -15,6 +16,7 @@ _sgr_state_update, _sgr_state_is_active, _sgr_state_to_sequence) +from .text_sizing import TextSizing, TextSizingParams from .escape_sequences import (_SEQUENCE_CLASSIFY, _HORIZONTAL_CURSOR_MOVEMENT, INDETERMINATE_EFFECT_SEQUENCE) @@ -272,6 +274,37 @@ def _clip_simple( idx = r.close_end continue + # OSC 66 Text Sizing. + if (ts_meta := m.group('ts_meta')) is not None: + ts_text = m.group('ts_text') + ts_term = m.group('ts_term') + assert ts_text is not None and ts_term is not None + ts = TextSizing( + TextSizingParams.from_params(ts_meta, control_codes=control_codes), + ts_text, ts_term) + ts_width = ts.display_width(ambiguous_width) + + if col >= start and col + ts_width <= end: + output.append(ts.make_sequence()) + if propagate_sgr and captured_style is None: + captured_style = current_style + col += ts_width + elif col < end and col + ts_width > start: + ts_parts: list[str] = [] + + def _ts_write(s: str, _w: int, _col: int) -> None: + ts_parts.append(s) + col = _text_sizing_clip( + ts, col, start, end, fillchar, ambiguous_width, + _ts_write) + output.extend(ts_parts) + if propagate_sgr and captured_style is None: + captured_style = current_style + else: + col += ts_width + idx = m.end() + continue + # Indeterminate-effect sequences: raise in strict mode. seq = m.group() if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): @@ -322,6 +355,91 @@ def _clip_simple( return ''.join(output), captured_style +def _text_sizing_clip( + ts: TextSizing, + col: int, + start: int, + end: int, + fillchar: str, + ambiguous_width: int, + write_cells: Callable[[str, int, int], None], +) -> int: + """ + Emit tokens for a text-sizing (OSC 66) sequence, clipped to (start, end). + + Calls *write_cells(text, width, col)* for each emitted cell or sequence. Returns new column + position. + """ + # pylint: disable=too-many-locals,too-many-branches,too-many-positional-arguments,too-complex + ts_width = ts.display_width(ambiguous_width) + + # Fully visible: emit entire sequence + if col >= start and col + ts_width <= end: + write_cells(ts.make_sequence(), ts_width, col) + return col + ts_width + # Fully outside: just advance column + if col >= end or col + ts_width <= start: + return col + ts_width + + # Partial overlap: decompose + rel_start = max(0, start - col) + rel_end = min(end, col + ts_width) - col + scale = ts.params.scale + + units: list[tuple[str, int]] = [] + if ts.params.width > 0: + for g in islice(iter_graphemes(ts.text), ts.params.width): + units.append((g, scale)) + for _ in range(ts.params.width - len(units)): + units.append(('', scale)) + else: + for g in iter_graphemes(ts.text): + units.append((g, width(g, ambiguous_width=ambiguous_width) * scale)) + + pending_units: list[tuple[str, int]] = [] + + def flush(flush_col: int) -> None: + if not pending_units: + return + texts = [u[0] for u in pending_units] + total_w = sum(u[1] for u in pending_units) + params = TextSizingParams( + scale, + len(texts) if ts.params.width > 0 else 0, + ts.params.numerator, ts.params.denominator, + ts.params.vertical_align, ts.params.horizontal_align) + write_cells( + TextSizing(params, ''.join(texts), ts.terminator).make_sequence(), + total_w, + flush_col) + pending_units.clear() + + flush_col_pos = col + rel_start + unit_pos = 0 + for unit_text, unit_w in units: + unit_end = unit_pos + unit_w + if unit_end <= rel_start: + unit_pos = unit_end + continue + if unit_pos >= rel_end: + break + + overlap = min(unit_end, rel_end) - max(unit_pos, rel_start) + if overlap == unit_w and unit_w > 0: + if not pending_units: + flush_col_pos = col + max(unit_pos, rel_start) + pending_units.append((unit_text, unit_w)) + else: + flush(flush_col_pos) + abs_start = col + max(unit_pos, rel_start) + for i in range(overlap): + write_cells(fillchar, 1, abs_start + i) + unit_pos = unit_end + + flush(flush_col_pos) + return col + ts_width + + def _clip_painter( text: str, start: int, @@ -437,6 +555,22 @@ def _write_cells(s: str, w: int, write_col: int, idx = r.close_end continue + # OSC 66 Text Sizing. + if (ts_meta := m.group('ts_meta')) is not None: + ts_text = m.group('ts_text') + ts_term = m.group('ts_term') + assert ts_text is not None and ts_term is not None + ts = TextSizing( + TextSizingParams.from_params(ts_meta, control_codes=control_codes), + ts_text, ts_term) + col = _text_sizing_clip( + ts, col, start, end, fillchar, ambiguous_width, + _write_cells) + if propagate_sgr and captured_style is None: + captured_style = current_style + idx = m.end() + continue + # Indeterminate-effect sequences: raise in strict mode. seq = m.group() if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): @@ -549,7 +683,7 @@ def clip( overtyping: Optional[bool] = None, ) -> str: r""" - Clip text to display columns ``(start, end)`` while preserving all terminal sequences. + Clip text to display columns (start, end) while preserving all terminal sequences. This function extracts a substring based on visible column positions rather than character indices. Terminal escape sequences are preserved in the output since @@ -603,7 +737,7 @@ def clip( characters. Set to ``True`` to force the painter's algorithm (useful for testing). Has no effect when ``control_codes='ignore'``. - :returns: Substring of ``text`` spanning display columns ``(start, end)``, + :returns: Substring of ``text`` spanning display columns (start, end), with all terminal sequences preserved and wide characters at boundaries replaced with ``fillchar``. @@ -625,7 +759,7 @@ def clip( .. versionchanged:: 0.7.0 Added ``control_codes`` parameter (default 'parse'). - OSC 8 hyperlink-aware clipping. + OSC 8 hyperlink-aware clipping. OSC 66 text sizing protocol support. Added ``overtyping`` parameter (default None, auto-detect). Example:: diff --git a/wcwidth/_width.py b/wcwidth/_width.py index b8d7cda..b2a44c8 100644 --- a/wcwidth/_width.py +++ b/wcwidth/_width.py @@ -12,9 +12,11 @@ _FITZPATRICK_RANGE, _REGIONAL_INDICATOR_SET) from .table_vs16 import VS16_NARROW_TO_WIDE +from .text_sizing import TextSizing, TextSizingParams from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL from .table_grapheme import ISC_CONSONANT from .escape_sequences import (_SEQUENCE_CLASSIFY, + TEXT_SIZING_PATTERN, CURSOR_MOVEMENT_SEQUENCE, INDETERMINATE_EFFECT_SEQUENCE, strip_sequences) @@ -63,8 +65,8 @@ def width( - ``'parse'`` (default): Track horizontal cursor movement like BS ``\b``, CR ``\r``, TAB ``\t``, cursor left and right movement sequences. Vertical movement (LF, VT, FF) and - indeterminate terminal sequences are zero-width. OSC 8 Hyperlink, and many other kinds - of output sequences are parsed for displayed measurements. + indeterminate terminal sequences are zero-width. OSC 66 Kitty Text Sizing protocol, OSC 8 + Hyperlink, and many other kinds of output sequences are parsed for displayed measurements. - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with indeterminate results of the screen or cursor, like clear or vertical movement. Generally, these should be handled with a virtual terminal emulator (like 'pyte'). @@ -92,6 +94,11 @@ def width( (CSI D) would move beyond the beginning of the string. Previously, cursor-left was silently clamped to column 0 in all modes. + Support horizontal cursor sequences (``cub``, ``cuf``, ``hpa``). Cursor-left (``cub``) or + backspace (``\b``) now overwrites text. ``column_address`` (``hpa``) and carriage return + (``\r``) are now parsed, and some values conditionally raise ``ValueError`` when + ``control_codes='parse'``. + Examples:: >>> width('hello') @@ -125,8 +132,12 @@ def width( if control_codes == 'parse' and len(text) > _WIDTH_FAST_PATH_MIN_LEN: # Check for cursor-affecting control characters if '\b' not in text and '\t' not in text and '\r' not in text: - # Check for escape sequences - if none contain cursor movement - if '\x1b' not in text or not CURSOR_MOVEMENT_SEQUENCE.search(text): + # Check for escape sequences, if none contain cursor movement or + # text sizing, downgrade to 'ignore' + if '\x1b' not in text or ( + not CURSOR_MOVEMENT_SEQUENCE.search(text) + and not TEXT_SIZING_PATTERN.search(text) + ): control_codes = 'ignore' # Fast path for ignore mode, useful if you know the text is already free of control codes @@ -188,7 +199,16 @@ def width( f"exceeding string start" ) current_col = max(0, current_col - n_backward) - # 2d. SGR and other zero-width sequences -- no column advance + # 2d. OSC 66 Text Sizing — has positive display width + elif (ts_meta := m.group('ts_meta')) is not None: + ts_text = m.group('ts_text') + ts_term = m.group('ts_term') + assert ts_text is not None and ts_term is not None + text_size = TextSizing( + TextSizingParams.from_params(ts_meta, control_codes=control_codes), + ts_text, ts_term) + current_col += text_size.display_width(ambiguous_width) + # 2e. SGR and other zero-width sequences -- no column advance idx = m.end() # Escape sequences break VS16 adjacency: reset last-measured state last_measured_idx = -2 @@ -215,12 +235,12 @@ def width( # 3. Horizontal movement characters if char in HORIZONTAL_CTRL: - if char == '\x09' and tabsize > 0: # Tab + if char == '\t' and tabsize > 0: current_col += tabsize - (current_col % tabsize) - elif char == '\x08': # Backspace + elif char == '\b': if current_col > 0: current_col -= 1 - elif char == '\x0d': # Carriage return + elif char == '\r': if strict: raise ValueError( f"Horizontal movement character \\r at position {idx}: " diff --git a/wcwidth/escape_sequences.py b/wcwidth/escape_sequences.py index 9c296de..77d2b9b 100644 --- a/wcwidth/escape_sequences.py +++ b/wcwidth/escape_sequences.py @@ -14,12 +14,18 @@ # local from .sgr_state import _SGR_PATTERN +# Text Sizing Protocol (OSC 66), https://sw.kovidgoyal.net/kitty/text-sizing-protocol/ +TEXT_SIZING_PATTERN = re.compile( + r'\x1b\]66;([^;\x07\x1b]*);([^\x07\x1b]*)(\x07|\x1b\\)' +) + # Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE, # originated from the 'blessed' library. ZERO_WIDTH_PATTERN = re.compile( # CSI sequences r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|' - # OSC sequences + # OSC sequences, note that text sizing protocol (OSC 66) is special case in width() and clip(), + # and contrary to the variable name, it is positive width. r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|' # APC sequences r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|' @@ -66,6 +72,7 @@ + '|' + CURSOR_HPA_SEQUENCE.pattern.replace('(', '(?P', 1) + '|' + CURSOR_RIGHT_SEQUENCE.pattern.replace('(', '(?P', 1) + '|' + CURSOR_LEFT_SEQUENCE.pattern.replace('(', '(?P', 1) + + '|' + r'\x1b\]66;(?P[^;\x07\x1b]*);(?P[^\x07\x1b]*)(?P\x07|\x1b\\)' + '|' + r'(?P(?:' + ZERO_WIDTH_PATTERN.pattern + '))' ) @@ -166,6 +173,9 @@ def strip_sequences(text: str) -> str: .. versionadded:: 0.3.0 + .. versionchanged:: 0.7.0 + Inner text of OSC 66 (Text sizing protocol) is preserved. + Example:: >>> strip_sequences('\x1b[31mred\x1b[0m') @@ -174,7 +184,11 @@ def strip_sequences(text: str) -> str: 'hello' >>> strip_sequences('\x1b[1m\x1b[31mbold red\x1b[0m text') 'bold red text' + >>> strip_sequences('\x1b]66;s=2;hello\x07') + 'hello' >>> strip_sequences('\x1b]8;id=34;https://example.com\x1b\\[view]\x1b]8;;\x1b\\') '[view]' """ + if '\x1b]66;' in text: + text = TEXT_SIZING_PATTERN.sub(r'\2', text) return ZERO_WIDTH_PATTERN.sub('', text) diff --git a/wcwidth/grapheme.py b/wcwidth/grapheme.py index cdfde22..87f61f5 100644 --- a/wcwidth/grapheme.py +++ b/wcwidth/grapheme.py @@ -266,9 +266,9 @@ def iter_graphemes( >>> list(iter_graphemes('cafe\u0301')) ['c', 'a', 'f', 'e\u0301'] - >>> list(iter_graphemes('\U0001F468\u200D\U0001F469\u200D\U0001F467')) + >>> list(iter_graphemes('ok\U0001F468\u200D\U0001F469\u200D\U0001F467')) ['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467'] - >>> list(iter_graphemes('\U0001F1FA\U0001F1F8')) + >>> list(iter_graphemes('ok\U0001F1FA\U0001F1F8')) ['o', 'k', '\U0001F1FA\U0001F1F8'] .. versionadded:: 0.3.0 diff --git a/wcwidth/text_sizing.py b/wcwidth/text_sizing.py new file mode 100644 index 0000000..c8e8cb7 --- /dev/null +++ b/wcwidth/text_sizing.py @@ -0,0 +1,200 @@ +r""" +`kitty text sizing protocol`_ (OSC 66) parsing and measurement. + +The kitty text sizing protocol allows terminal apps to explicitly tell +terminals how many cells text occupies, using the escape sequence:: + + ESC ] 66 ; metadata ; text BEL/ST + +Metadata is colon-separated ``key=value`` pairs: + +- ``s``: scale +- ``w``: width in cells +- ``n``: fractional numerator +- ``d``: fractional denominator +- ``v``: vertical alignment +- ``h``: horizontal alignment + +Parsing is pretty straight-forward: + +- When ``w > 0``, return ``s * w``. +- Otherwise ``w == 0``, ``s * wcswidth(inner_text_width)`` cells. + +Numerator, denominator, and alignment codes and values are parsed but otherwise ignored +and have no effect on measurements made in this library. + +.. _`kitty text sizing protocol`: https://sw.kovidgoyal.net/kitty/text-sizing-protocol/ + +.. versionadded:: 0.7.0 +""" + +from __future__ import annotations + +# std imports +import re + +import typing + +# local +from ._wcswidth import wcswidth + + +class _FieldMeta(typing.NamedTuple): + name: str + low: int + high: int + default: int + + +TEXT_FIELD_MAPPING: dict[str, _FieldMeta] = { + 's': _FieldMeta(name='scale', low=1, high=7, default=1), + 'w': _FieldMeta(name='width', low=0, high=7, default=0), + 'n': _FieldMeta(name='numerator', low=0, high=15, default=0), + 'd': _FieldMeta(name='denominator', low=0, high=15, default=0), + 'v': _FieldMeta(name='vertical_align', low=0, high=2, default=0), + 'h': _FieldMeta(name='horizontal_align', low=0, high=2, default=0)} + + +class TextSizingParams(typing.NamedTuple): + """ + Parsed parameters from a text sizing escape sequence (OSC 66). + + :param scale: Scale factor (1-7). Text occupies ``scale`` rows tall and ``scale * width`` + columns wide. + :param width: Width in cells (0-7). When 0, width is auto-calculated from the inner text. + :param numerator: Fractional scaling numerator (0-15). + :param denominator: Fractional scaling denominator (0-15). + :param vertical_align: Vertical alignment (0=top, 1=bottom, 2=center). + :param horizontal_align: Horizontal alignment (0=left, 1=right, 2=center). + """ + + scale: int = 1 + width: int = 0 + numerator: int = 0 + denominator: int = 0 + vertical_align: int = 0 + horizontal_align: int = 0 + + def __repr__(self) -> str: + """ + Return a compact representation including only non-default fields. + + This avoids verbose output when most fields are defaults. + """ + # modified to show values only when non-default + repr_fmt = ', '.join(f'{field.name}={getattr(self, field.name)}' + for field in TEXT_FIELD_MAPPING.values() + if getattr(self, field.name) != field.default) + return f'{self.__class__.__name__}({repr_fmt})' + + def make_sequence(self) -> str: + """Build and return sub-part of an OSC 66 sequence.""" + parts = [] + # build string for all known parameters of non-default values + for field_key, field in TEXT_FIELD_MAPPING.items(): + if (val := getattr(self, field.name)) != field.default: + parts.append(f'{field_key}={val}') + return ':'.join(parts) + + @classmethod + def from_params(cls, raw: str, control_codes: str = 'parse') -> TextSizingParams: + """ + Parse colon-separated ``key=value`` metadata string. + + :param raw: Metadata string, e.g. ``'s=2:w=3'``. + :param control_codes: 'parse' or 'strict'. + :raises ValueError: If ``control_codes='strict'`` unrecognized text sizing parameters raise + ValueError. + :returns: Parsed parameters with values clamped to valid ranges. + Unknown keys are ignored. Non-integer values use defaults. + + Example:: + + >>> TextSizingParams.from_params('s=2:w=3') + TextSizingParams(scale=2, width=3, numerator=0, denominator=0, \ + vertical_align=0, horizontal_align=0) + """ + kwargs: typing.Dict[str, int] = {} + if not raw: + return cls() + for part in raw.split(':'): + if '=' not in part: + if control_codes == 'strict': + raise ValueError(f"Expected '=' in text sizing parameter (key=val), " + f"got {part!r} in OSC 66 sequence, {raw!r}") + continue + key, _eq, val = part.partition('=') + field = TEXT_FIELD_MAPPING.get(key) + if field is None: + if control_codes == 'strict': + raise ValueError(f"Unknown text sizing field '{key}' " + f"in OSC 66 sequence, {raw!r}") + # ignore unknown fields unless 'strict' + continue + try: + value = int(val) + except ValueError as exc: + if control_codes == 'strict': + raise ValueError(f"Illegal text sizing value '{val}' " + f"in OSC 66 sequence, {raw!r}: {exc}") from exc + # ignore value, uses default value without warning unless 'strict' + continue + if control_codes == 'strict' and (value > field.high or value < field.low): + raise ValueError(f"Out of bounds text sizing value '{val}' " + f"in OSC 66 sequence, {raw!r}: " + f"allowed range for '{key}' ({field.name}) " + f"is {field.low} to {field.high}") + kwargs[field.name] = max(field.low, min(field.high, value)) + return cls(**kwargs) + + +class TextSizing(typing.NamedTuple): + """Basic horizontal width measurement for kitty text sizing protocol.""" + + params: TextSizingParams + text: str + terminator: str + + @classmethod + def from_match(cls, match: re.Match[str], control_codes: str = 'parse') -> TextSizing: + r""" + Parse using matching OSC 66 Sequence. + + :param match: match object from :attr:`wcwidth.escape_sequences.TEXT_SIZING_PATTERN`. + :param control_codes: 'parse' or 'strict', same meaning as delegated by + :func:`wcwidth.width`. + :raises ValueError: When ``control_codes='strict'`` for unrecognized, invalid, or out of + bounds text sizing parameters. + :returns: TextSizing object from parsed sequence + + Example:: + + from wcwidth.escape_sequences import TEXT_SIZING_PATTERN + >>> TextSizing.from_match(TEXT_SIZING_PATTERN.match('\x1b]66;w=2;XY\x07')) + TextSizing(params=TextSizingParams(scale=1, width=2, numerator=0, denominator=0, \ + vertical_align=0, horizontal_align=0), text='XY', terminator='\x07') + """ + return cls(params=TextSizingParams.from_params(match.group(1), control_codes=control_codes), + text=match.group(2), + terminator=match.group(3)) + + def display_width(self, ambiguous_width: int = 1) -> int: + """ + Calculate the display width of a text sizing sequence. + + :param ambiguous_width: Width for East Asian Ambiguous characters. + :returns: Display width in terminal cells. When ``width > 0``, returns + ``params.scale * params.width``. When ``width == 0``, returns + ``params.scale * measured_inner_width``. + + .. note: Fractional scaling (numerator/denominator) does not affect the + cell count, it adjusts only the font size within the cells allocated by 'w'. + """ + if self.params.width > 0: + return self.params.scale * self.params.width + w = wcswidth(self.text, ambiguous_width=ambiguous_width) + return self.params.scale * max(0, w) + + def make_sequence(self) -> str: + """Build and return complete OSC 66 Terminal Sequence.""" + return f'\x1b]66;{self.params.make_sequence()};{self.text}{self.terminator}'