From 82af96227731cf61dee1d228383d28c09842e4a3 Mon Sep 17 00:00:00 2001 From: shalu Date: Wed, 11 Feb 2026 19:06:05 +0530 Subject: [PATCH 1/3] [CODEC-249] Fix Incorrect transform of CH digraph according Metaphone basic rules --- .../commons/codec/language/Metaphone.java | 16 ++++++---------- .../commons/codec/language/MetaphoneTest.java | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/language/Metaphone.java b/src/main/java/org/apache/commons/codec/language/Metaphone.java index 53b5960b34..429bc2a92b 100644 --- a/src/main/java/org/apache/commons/codec/language/Metaphone.java +++ b/src/main/java/org/apache/commons/codec/language/Metaphone.java @@ -246,7 +246,11 @@ public String metaphone(final String txt) { if (isPreviousChar(local, n, 'S') && !isLastChar(wdsz, n) && FRONTV.indexOf(local.charAt(n + 1)) >= 0) { break; } - if (regionMatch(local, n, "CIA")) { // "CIA" -> X + if (isPreviousChar(local, n, 'S') && isNextChar(local, n, 'H')) { // SCH->sk + code.append('K'); + break; + } + if (regionMatch(local, n, "CIA") || isNextChar(local, n, 'H')) { // "CIA" -> X or CH -> X code.append('X'); break; } @@ -254,15 +258,7 @@ public String metaphone(final String txt) { code.append('S'); break; // CI,CE,CY -> S } - if (isPreviousChar(local, n, 'S') && isNextChar(local, n, 'H')) { // SCH->sk - code.append('K'); - break; - } - if (!isNextChar(local, n, 'H') || n == 0 && wdsz >= 3 && isVowel(local, 2)) { // CH consonant -> K consonant - code.append('K'); - } else { - code.append('X'); // CHvowel -> X - } + code.append('K'); // default C -> K break; case 'D': if (!isLastChar(wdsz, n + 1) && isNextChar(local, n, 'G') && FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J diff --git a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java index 26dd0cc189..491995605a 100644 --- a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java +++ b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java @@ -272,11 +272,27 @@ void testTIOAndTIAToX() { @Test void testTranslateOfSCHAndCH() { + assertEquals("SNS", getStringEncoder().metaphone("SCIENCE")); + assertEquals("SN", getStringEncoder().metaphone("SCENE")); + assertEquals("S", getStringEncoder().metaphone("SCY")); + assertEquals("SKTL", getStringEncoder().metaphone("SCHEDULE")); assertEquals("SKMT", getStringEncoder().metaphone("SCHEMATIC")); + assertEquals("TSKR", getStringEncoder().metaphone("DISCHARGE")); - assertEquals("KRKT", getStringEncoder().metaphone("CHARACTER")); + assertEquals("EX", getStringEncoder().metaphone("ECHO")); + + assertEquals("XRKT", getStringEncoder().metaphone("CHARACTER")); assertEquals("TX", getStringEncoder().metaphone("TEACH")); + + assertEquals("XR", getStringEncoder().metaphone("CHERI")); + assertEquals("XP", getStringEncoder().metaphone("CHIP")); + assertEquals("XRST", getStringEncoder().metaphone("CHRIST")); + + assertEquals("X", getStringEncoder().metaphone("CIAO")); + assertEquals("ST", getStringEncoder().metaphone("CITY")); + assertEquals("KT", getStringEncoder().metaphone("CAT")); + } @Test From a546baac3f7395fe7ef13621d48fd3708761ac45 Mon Sep 17 00:00:00 2001 From: shalu Date: Thu, 12 Feb 2026 20:26:55 +0530 Subject: [PATCH 2/3] [CODEC-249] Add. test for Metaphone CH character translation with max code length of 5 --- .../org/apache/commons/codec/language/MetaphoneTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java index 491995605a..b6c4e5998f 100644 --- a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java +++ b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java @@ -282,7 +282,6 @@ void testTranslateOfSCHAndCH() { assertEquals("EX", getStringEncoder().metaphone("ECHO")); - assertEquals("XRKT", getStringEncoder().metaphone("CHARACTER")); assertEquals("TX", getStringEncoder().metaphone("TEACH")); assertEquals("XR", getStringEncoder().metaphone("CHERI")); @@ -295,6 +294,13 @@ void testTranslateOfSCHAndCH() { } + @Test + void testTranslateOfCHCharacterWithMaxCodeLenFive() { + final Metaphone metaphone = new Metaphone(); + metaphone.setMaxCodeLen(5); + assertEquals("XRKTR", metaphone.metaphone("CHARACTER")); + } + @Test void testTranslateToJOfDGEOrDGIOrDGY() { assertEquals("TJ", getStringEncoder().metaphone("DODGY")); From d001404223d425504b4b8134575b6261de2e1f75 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 12 Feb 2026 14:14:03 -0500 Subject: [PATCH 3/3] Remove blank lines. --- .../org/apache/commons/codec/language/MetaphoneTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java index b6c4e5998f..a5f43807b3 100644 --- a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java +++ b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java @@ -275,19 +275,14 @@ void testTranslateOfSCHAndCH() { assertEquals("SNS", getStringEncoder().metaphone("SCIENCE")); assertEquals("SN", getStringEncoder().metaphone("SCENE")); assertEquals("S", getStringEncoder().metaphone("SCY")); - assertEquals("SKTL", getStringEncoder().metaphone("SCHEDULE")); assertEquals("SKMT", getStringEncoder().metaphone("SCHEMATIC")); assertEquals("TSKR", getStringEncoder().metaphone("DISCHARGE")); - assertEquals("EX", getStringEncoder().metaphone("ECHO")); - assertEquals("TX", getStringEncoder().metaphone("TEACH")); - assertEquals("XR", getStringEncoder().metaphone("CHERI")); assertEquals("XP", getStringEncoder().metaphone("CHIP")); assertEquals("XRST", getStringEncoder().metaphone("CHRIST")); - assertEquals("X", getStringEncoder().metaphone("CIAO")); assertEquals("ST", getStringEncoder().metaphone("CITY")); assertEquals("KT", getStringEncoder().metaphone("CAT"));