diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7f27750..4339f3d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,6 +72,7 @@ jobs: - "3.12" - "3.13" - "3.14" + - "3.15" runs-on: ${{ matrix.os }} container: ${{ matrix.container }} diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index 6cc76ae..6b120f5 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -20,7 +20,8 @@ jobs: - uses: actions/setup-python@v6 with: - python-version: '3.14' + python-version: '3.15' + allow-prereleases: true cache: pip - name: Install dependencies diff --git a/pyproject.toml b/pyproject.toml index 5b04904..a93c3a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: 3.15", "Topic :: Software Development :: Internationalization", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Localization", diff --git a/wcwidth/grapheme.py b/wcwidth/grapheme.py index 87f61f5..9342962 100644 --- a/wcwidth/grapheme.py +++ b/wcwidth/grapheme.py @@ -10,6 +10,8 @@ from __future__ import annotations # std imports +import sys +import unicodedata from enum import IntEnum from functools import lru_cache @@ -36,6 +38,12 @@ # std imports from collections.abc import Iterator +# check for python 3.15 for new iter_graphemes() function +_HAS_PYTHON315_ITER_GRAPHEMES = ( + sys.version_info >= (3, 15) + and hasattr(unicodedata, 'iter_graphemes') +) + # Maximum backward scan distance when finding grapheme cluster boundaries. # Covers all known Unicode grapheme clusters with margin; longer sequences are pathological. MAX_GRAPHEME_SCAN = 32 @@ -245,13 +253,13 @@ def _should_break( return BreakResult(should_break=True, ri_count=ri_count) -def iter_graphemes( +def _iter_graphemes_stdlib( unistr: str, start: int = 0, end: Optional[int] = None, ) -> Iterator[str]: r""" - Iterate over grapheme clusters in a Unicode string. + Iterate over grapheme clusters using :func:`unicodedata.iter_graphemes`. Grapheme clusters are "user-perceived characters" - what a user would consider a single character, which may consist of multiple Unicode @@ -286,6 +294,30 @@ def iter_graphemes( end = min(end, length) + full_segment = unistr[start:end] + for seg in unicodedata.iter_graphemes(full_segment): # type: ignore[attr-defined] # pylint: disable=no-member + yield full_segment[seg.start:seg.end] + + +def _iter_graphemes_python( + unistr: str, + start: int = 0, + end: int | None = None, +) -> Iterator[str]: + """Pure-Python grapheme cluster iteration following UAX #29.""" + if not unistr: + return + + length = len(unistr) + + if end is None: + end = length + + if start >= end or start >= length: + return + + end = min(end, length) + # Track state for grapheme cluster boundaries cluster_start = start ri_count = 0 @@ -426,3 +458,10 @@ def iter_graphemes_reverse( break yield unistr[cluster_start:pos] pos = cluster_start + + +# Bind iter_graphemes at module level to avoid per-call dispatch overhead. +iter_graphemes = ( + _iter_graphemes_stdlib if _HAS_PYTHON315_ITER_GRAPHEMES + else _iter_graphemes_python +)