Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ jobs:
- "3.12"
- "3.13"
- "3.14"
- "3.15"

runs-on: ${{ matrix.os }}
container: ${{ matrix.container }}
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ jobs:

- uses: actions/setup-python@v6
with:
python-version: '3.14'
python-version: '3.15'
allow-prereleases: true
cache: pip

- name: Install dependencies
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: 3.15",
"Topic :: Software Development :: Internationalization",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Localization",
Expand Down
43 changes: 41 additions & 2 deletions wcwidth/grapheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from __future__ import annotations

# std imports
import sys
import unicodedata
from enum import IntEnum
from functools import lru_cache

Expand All @@ -36,6 +38,12 @@
# std imports
from collections.abc import Iterator

# check for python 3.15 for new iter_graphemes() function
_HAS_PYTHON315_ITER_GRAPHEMES = (
sys.version_info >= (3, 15)
and hasattr(unicodedata, 'iter_graphemes')
)

# Maximum backward scan distance when finding grapheme cluster boundaries.
# Covers all known Unicode grapheme clusters with margin; longer sequences are pathological.
MAX_GRAPHEME_SCAN = 32
Expand Down Expand Up @@ -245,13 +253,13 @@ def _should_break(
return BreakResult(should_break=True, ri_count=ri_count)


def iter_graphemes(
def _iter_graphemes_stdlib(
unistr: str,
start: int = 0,
end: Optional[int] = None,
) -> Iterator[str]:
r"""
Iterate over grapheme clusters in a Unicode string.
Iterate over grapheme clusters using :func:`unicodedata.iter_graphemes`.

Grapheme clusters are "user-perceived characters" - what a user would
consider a single character, which may consist of multiple Unicode
Expand Down Expand Up @@ -286,6 +294,30 @@ def iter_graphemes(

end = min(end, length)

full_segment = unistr[start:end]
for seg in unicodedata.iter_graphemes(full_segment): # type: ignore[attr-defined] # pylint: disable=no-member
yield full_segment[seg.start:seg.end]


def _iter_graphemes_python(
unistr: str,
start: int = 0,
end: int | None = None,
) -> Iterator[str]:
"""Pure-Python grapheme cluster iteration following UAX #29."""
if not unistr:
return

length = len(unistr)

if end is None:
end = length

if start >= end or start >= length:
return

end = min(end, length)

# Track state for grapheme cluster boundaries
cluster_start = start
ri_count = 0
Expand Down Expand Up @@ -426,3 +458,10 @@ def iter_graphemes_reverse(
break
yield unistr[cluster_start:pos]
pos = cluster_start


# Bind iter_graphemes at module level to avoid per-call dispatch overhead.
iter_graphemes = (
_iter_graphemes_stdlib if _HAS_PYTHON315_ITER_GRAPHEMES
else _iter_graphemes_python
)
Loading