From fa4ae9626f444b61119f794d0b6070f9841ac1af Mon Sep 17 00:00:00 2001 From: Anton Rybochkin Date: Fri, 10 Apr 2026 01:51:04 +0200 Subject: [PATCH] Fixed asymmetric behavior for case-insensitive matches --- java/com/google/re2j/UnicodeTables.java | 115 +++++++++++++++--- javatests/com/google/re2j/MatcherTest.java | 8 ++ .../google/re2j/UnicodeTablesGenerator.java | 2 +- 3 files changed, 108 insertions(+), 17 deletions(-) diff --git a/java/com/google/re2j/UnicodeTables.java b/java/com/google/re2j/UnicodeTables.java index 8bd827fb..8b5ef98a 100644 --- a/java/com/google/re2j/UnicodeTables.java +++ b/java/com/google/re2j/UnicodeTables.java @@ -1,10 +1,10 @@ /* - * Copyright (c) 2020 The Go Authors. All rights reserved. + * Copyright (c) 2026 The Go Authors. All rights reserved. * * Use of this source code is governed by a BSD-style * license that can be found in the LICENSE file. */ -// Generated at 2020-06-09T16:20:06.352Z by Java 1.8.0_181 using Unicode version 6.0.0.0. +// Generated at 2026-04-09T23:48:18.149Z by Java 1.8.0_472 using Unicode version 6.0.0.0. // Do not change this file, your edits will be lost. Instead change UnicodeTablesGenerator.java. package com.google.re2j; @@ -4189,20 +4189,103 @@ private static int[][] make_foldLu() { private static int[][] make_foldLl() { return new int[][] { - {0x004B, 0x0053, 8}, - {0x00C5, 0x01C4, 255}, - {0x01C5, 0x01C7, 2}, - {0x01C8, 0x01CA, 2}, - {0x01CB, 0x01F1, 38}, - {0x01F2, 0x0345, 339}, - {0x0392, 0x0398, 3}, - {0x0399, 0x039A, 1}, - {0x039C, 0x03A0, 4}, - {0x03A1, 0x03A3, 2}, - {0x03A6, 0x03A9, 3}, - {0x03F4, 0x1E60, 6764}, - {0x1E9E, 0x2126, 648}, - {0x212A, 0x212B, 1} + {0x0041, 0x005A, 1}, + {0x00C0, 0x00D6, 1}, + {0x00D8, 0x00DE, 1}, + {0x0100, 0x012E, 2}, + {0x0132, 0x0136, 2}, + {0x0139, 0x0147, 2}, + {0x014A, 0x0178, 2}, + {0x0179, 0x017D, 2}, + {0x0181, 0x0182, 1}, + {0x0184, 0x0186, 2}, + {0x0187, 0x0189, 2}, + {0x018A, 0x018B, 1}, + {0x018E, 0x0191, 1}, + {0x0193, 0x0194, 1}, + {0x0196, 0x0198, 1}, + {0x019C, 0x019D, 1}, + {0x019F, 0x01A0, 1}, + {0x01A2, 0x01A6, 2}, + {0x01A7, 0x01A9, 2}, + {0x01AC, 0x01AE, 2}, + {0x01AF, 0x01B1, 2}, + {0x01B2, 0x01B3, 1}, + {0x01B5, 0x01B7, 2}, + {0x01B8, 0x01BC, 4}, + {0x01C4, 0x01C5, 1}, + {0x01C7, 0x01C8, 1}, + {0x01CA, 0x01CB, 1}, + {0x01CD, 0x01DB, 2}, + {0x01DE, 0x01EE, 2}, + {0x01F1, 0x01F2, 1}, + {0x01F4, 0x01F6, 2}, + {0x01F7, 0x01F8, 1}, + {0x01FA, 0x0232, 2}, + {0x023A, 0x023B, 1}, + {0x023D, 0x023E, 1}, + {0x0241, 0x0243, 2}, + {0x0244, 0x0246, 1}, + {0x0248, 0x024E, 2}, + {0x0345, 0x0370, 43}, + {0x0372, 0x0376, 4}, + {0x0386, 0x0388, 2}, + {0x0389, 0x038A, 1}, + {0x038C, 0x038E, 2}, + {0x038F, 0x0391, 2}, + {0x0392, 0x03A1, 1}, + {0x03A3, 0x03AB, 1}, + {0x03CF, 0x03D8, 9}, + {0x03DA, 0x03EE, 2}, + {0x03F4, 0x03F7, 3}, + {0x03F9, 0x03FA, 1}, + {0x03FD, 0x042F, 1}, + {0x0460, 0x0480, 2}, + {0x048A, 0x04C0, 2}, + {0x04C1, 0x04CD, 2}, + {0x04D0, 0x0526, 2}, + {0x0531, 0x0556, 1}, + {0x10A0, 0x10C5, 1}, + {0x1E00, 0x1E94, 2}, + {0x1E9E, 0x1EFE, 2}, + {0x1F08, 0x1F0F, 1}, + {0x1F18, 0x1F1D, 1}, + {0x1F28, 0x1F2F, 1}, + {0x1F38, 0x1F3F, 1}, + {0x1F48, 0x1F4D, 1}, + {0x1F59, 0x1F5F, 2}, + {0x1F68, 0x1F6F, 1}, + {0x1F88, 0x1F8F, 1}, + {0x1F98, 0x1F9F, 1}, + {0x1FA8, 0x1FAF, 1}, + {0x1FB8, 0x1FBC, 1}, + {0x1FC8, 0x1FCC, 1}, + {0x1FD8, 0x1FDB, 1}, + {0x1FE8, 0x1FEC, 1}, + {0x1FF8, 0x1FFC, 1}, + {0x2126, 0x212A, 4}, + {0x212B, 0x2132, 7}, + {0x2183, 0x2C00, 2685}, + {0x2C01, 0x2C2E, 1}, + {0x2C60, 0x2C62, 2}, + {0x2C63, 0x2C64, 1}, + {0x2C67, 0x2C6D, 2}, + {0x2C6E, 0x2C70, 1}, + {0x2C72, 0x2C75, 3}, + {0x2C7E, 0x2C80, 1}, + {0x2C82, 0x2CE2, 2}, + {0x2CEB, 0x2CED, 2}, + {0xA640, 0xA66C, 2}, + {0xA680, 0xA696, 2}, + {0xA722, 0xA72E, 2}, + {0xA732, 0xA76E, 2}, + {0xA779, 0xA77D, 2}, + {0xA77E, 0xA786, 2}, + {0xA78B, 0xA78D, 2}, + {0xA790, 0xA7A0, 16}, + {0xA7A2, 0xA7A8, 2}, + {0xFF21, 0xFF3A, 1}, + {0x10400, 0x10427, 1} }; } diff --git a/javatests/com/google/re2j/MatcherTest.java b/javatests/com/google/re2j/MatcherTest.java index 4f24ae2e..dd847af0 100644 --- a/javatests/com/google/re2j/MatcherTest.java +++ b/javatests/com/google/re2j/MatcherTest.java @@ -524,4 +524,12 @@ public void testPatternLongestMatch() { assertEquals("aaa bbb", text.substring(matcher.start(), matcher.end())); } } + + @Test + public void testMatchCategoryCaseInsensitive() { + assertTrue(Pattern.compile("(?i)\\p{Ll}").matches("A")); + assertTrue(Pattern.compile("(?i)\\p{Ll}").matches("a")); + assertTrue(Pattern.compile("(?i)\\p{Lu}").matches("A")); + assertTrue(Pattern.compile("(?i)\\p{Lu}").matches("a")); + } } diff --git a/unicode/src/main/java/com/google/re2j/UnicodeTablesGenerator.java b/unicode/src/main/java/com/google/re2j/UnicodeTablesGenerator.java index 324dc219..865fb689 100644 --- a/unicode/src/main/java/com/google/re2j/UnicodeTablesGenerator.java +++ b/unicode/src/main/java/com/google/re2j/UnicodeTablesGenerator.java @@ -363,7 +363,7 @@ private boolean addFoldExceptions(String name, Set codepointClass) { for (int codepoint : codepointClass) { if (!sortedOrbits.containsKey(codepoint)) { // Just uppercase and lowercase. - int u = UCharacter.toLowerCase(codepoint); + int u = UCharacter.toUpperCase(codepoint); if (u != codepoint) { exceptionCodepoints.add(u); }