diff --git a/src/main/java/org/apache/commons/codec/language/Metaphone.java b/src/main/java/org/apache/commons/codec/language/Metaphone.java index 53b5960b34..429bc2a92b 100644 --- a/src/main/java/org/apache/commons/codec/language/Metaphone.java +++ b/src/main/java/org/apache/commons/codec/language/Metaphone.java @@ -246,7 +246,11 @@ public String metaphone(final String txt) { if (isPreviousChar(local, n, 'S') && !isLastChar(wdsz, n) && FRONTV.indexOf(local.charAt(n + 1)) >= 0) { break; } - if (regionMatch(local, n, "CIA")) { // "CIA" -> X + if (isPreviousChar(local, n, 'S') && isNextChar(local, n, 'H')) { // SCH->sk + code.append('K'); + break; + } + if (regionMatch(local, n, "CIA") || isNextChar(local, n, 'H')) { // "CIA" -> X or CH -> X code.append('X'); break; } @@ -254,15 +258,7 @@ public String metaphone(final String txt) { code.append('S'); break; // CI,CE,CY -> S } - if (isPreviousChar(local, n, 'S') && isNextChar(local, n, 'H')) { // SCH->sk - code.append('K'); - break; - } - if (!isNextChar(local, n, 'H') || n == 0 && wdsz >= 3 && isVowel(local, 2)) { // CH consonant -> K consonant - code.append('K'); - } else { - code.append('X'); // CHvowel -> X - } + code.append('K'); // default C -> K break; case 'D': if (!isLastChar(wdsz, n + 1) && isNextChar(local, n, 'G') && FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J diff --git a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java index 26dd0cc189..a5f43807b3 100644 --- a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java +++ b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java @@ -272,11 +272,28 @@ void testTIOAndTIAToX() { @Test void testTranslateOfSCHAndCH() { + assertEquals("SNS", getStringEncoder().metaphone("SCIENCE")); + assertEquals("SN", getStringEncoder().metaphone("SCENE")); + assertEquals("S", getStringEncoder().metaphone("SCY")); assertEquals("SKTL", getStringEncoder().metaphone("SCHEDULE")); assertEquals("SKMT", getStringEncoder().metaphone("SCHEMATIC")); - - assertEquals("KRKT", getStringEncoder().metaphone("CHARACTER")); + assertEquals("TSKR", getStringEncoder().metaphone("DISCHARGE")); + assertEquals("EX", getStringEncoder().metaphone("ECHO")); assertEquals("TX", getStringEncoder().metaphone("TEACH")); + assertEquals("XR", getStringEncoder().metaphone("CHERI")); + assertEquals("XP", getStringEncoder().metaphone("CHIP")); + assertEquals("XRST", getStringEncoder().metaphone("CHRIST")); + assertEquals("X", getStringEncoder().metaphone("CIAO")); + assertEquals("ST", getStringEncoder().metaphone("CITY")); + assertEquals("KT", getStringEncoder().metaphone("CAT")); + + } + + @Test + void testTranslateOfCHCharacterWithMaxCodeLenFive() { + final Metaphone metaphone = new Metaphone(); + metaphone.setMaxCodeLen(5); + assertEquals("XRKTR", metaphone.metaphone("CHARACTER")); } @Test