From dfc4d305b93c1e2b3ea3eddf47ed955fd689cd0f Mon Sep 17 00:00:00 2001 From: wvell Date: Fri, 1 Aug 2025 10:23:36 +0200 Subject: [PATCH] =?UTF-8?q?Use=20the=20PCRE-escaped=20non-breaking=20space?= =?UTF-8?q?=20so=20the=20/x=20modifier=20doesn=E2=80=99t=20strip=20it=20ou?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously it injected a raw NBSP byte (`"\xC2\xA0"`) into the patterns, but under PCRE2’s extended mode (`/x`) and certain locales (notably on macOS after a `setlocale()` call), that literal byte gets treated as ignorable "whitespace" in the pattern and is dropped at compile time. This causes regexes to collapse into zero-length matches. By switching to the PCRE escape `\x{00A0}` it ensures the NBSP code point remains in the pattern text, and the regex reliably matches actual non-breaking spaces across platforms and locales. --- src/fixes/node-fixes/class-space-collapse-fix.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fixes/node-fixes/class-space-collapse-fix.php b/src/fixes/node-fixes/class-space-collapse-fix.php index 11a3b22..158a3fc 100644 --- a/src/fixes/node-fixes/class-space-collapse-fix.php +++ b/src/fixes/node-fixes/class-space-collapse-fix.php @@ -41,8 +41,11 @@ */ class Space_Collapse_Fix extends Abstract_Node_Fix { + // Use the PCRE-escaped non-breaking space so the /x modifier doesn’t strip it out. + const PCRE_ESCAPED_NO_BREAK_SPACE = "\\x{00A0}"; + const COLLAPSE_NORMAL_SPACES = '/[' . RE::NORMAL_SPACES . ']+/Sxu'; - const COLLAPSE_NON_BREAKABLE_SPACES = '/(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*' . U::NO_BREAK_SPACE . '(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*/Sxu'; + const COLLAPSE_NON_BREAKABLE_SPACES = '/(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*' . self::PCRE_ESCAPED_NO_BREAK_SPACE . '(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*/Sxu'; const COLLAPSE_OTHER_SPACES = '/(?:[' . RE::NORMAL_SPACES . '])*(' . RE::HTML_SPACES . ')(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')*/Sxu'; const COLLAPSE_SPACES_AT_START_OF_BLOCK = '/\A(?:[' . RE::NORMAL_SPACES . ']|' . RE::HTML_SPACES . ')+/Sxu';