From 54dc9eb17d52395d3bf1096ecbb85d73c0bc9b71 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Fri, 22 May 2026 17:37:56 +0200 Subject: [PATCH] Fix appendReplacement validation in RE2J. Strictly validate replacement strings in Matcher.appendReplacement to match java.util.regex behavior. Throw IllegalArgumentException for trailing $ or \, invalid group references, and invalid named group syntax. Closes https://github.com/google/re2j/issues/196 --- java/com/google/re2j/Matcher.java | 61 ++++++++++-------- javatests/com/google/re2j/MatcherTest.java | 74 ++++++++++++++++++++++ 2 files changed, 108 insertions(+), 27 deletions(-) diff --git a/java/com/google/re2j/Matcher.java b/java/com/google/re2j/Matcher.java index 678186d0..ee93d3db 100644 --- a/java/com/google/re2j/Matcher.java +++ b/java/com/google/re2j/Matcher.java @@ -486,28 +486,37 @@ private void appendReplacementInternal(StringBuilder sb, String replacement) { int last = 0; int i = 0; int m = replacement.length(); - for (; i < m - 1; i++) { - if (replacement.charAt(i) == '\\') { + while (i < m) { + char c = replacement.charAt(i); + if (c == '\\') { if (last < i) { sb.append(replacement.substring(last, i)); } i++; + if (i == m) { + throw new IllegalArgumentException("character to be escaped is missing"); + } last = i; + i++; continue; } - if (replacement.charAt(i) == '$') { - int c = replacement.charAt(i + 1); - if ('0' <= c && c <= '9') { - int n = c - '0'; - if (last < i) { - sb.append(replacement.substring(last, i)); - } - for (i += 2; i < m; i++) { - c = replacement.charAt(i); - if (c < '0' || c > '9' || n * 10 + c - '0' > groupCount) { + if (c == '$') { + if (last < i) { + sb.append(replacement, last, i); + } + if (i + 1 >= m) { + throw new IllegalArgumentException("Illegal group reference: group index is missing"); + } + char next = replacement.charAt(i + 1); + if ('0' <= next && next <= '9') { + int n = next - '0'; + int j = i + 2; + for (; j < m; j++) { + char digit = replacement.charAt(j); + if (digit < '0' || digit > '9' || n * 10 + digit - '0' > groupCount) { break; } - n = n * 10 + c - '0'; + n = n * 10 + digit - '0'; } if (n > groupCount) { throw new IndexOutOfBoundsException("n > number of groups: " + n); @@ -516,28 +525,26 @@ private void appendReplacementInternal(StringBuilder sb, String replacement) { if (group != null) { sb.append(group); } + i = j; last = i; - i--; - continue; - } else if (c == '{') { - if (last < i) { - sb.append(replacement.substring(last, i)); - } - i++; // skip { - int j = i + 1; - while (j < replacement.length() - && replacement.charAt(j) != '}' - && replacement.charAt(j) != ' ') { + } else if (next == '{') { + int j = i + 2; + while (j < m && replacement.charAt(j) != '}') { j++; } - if (j == replacement.length() || replacement.charAt(j) != '}') { + if (j >= m) { throw new IllegalArgumentException("named capture group is missing trailing '}'"); } - String groupName = replacement.substring(i + 1, j); + String groupName = replacement.substring(i + 2, j); sb.append(group(groupName)); - last = j + 1; + i = j + 1; + last = i; + } else { + throw new IllegalArgumentException("Illegal group reference"); } + continue; } + i++; } if (last < m) { sb.append(replacement, last, m); diff --git a/javatests/com/google/re2j/MatcherTest.java b/javatests/com/google/re2j/MatcherTest.java index 4f24ae2e..8b0cbbb2 100644 --- a/javatests/com/google/re2j/MatcherTest.java +++ b/javatests/com/google/re2j/MatcherTest.java @@ -6,6 +6,7 @@ */ package com.google.re2j; +import static com.google.common.truth.Truth.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; @@ -524,4 +525,77 @@ public void testPatternLongestMatch() { assertEquals("aaa bbb", text.substring(matcher.start(), matcher.end())); } } + + @Test + public void testAppendReplacementValidation() { + Matcher m = Pattern.compile("ab").matcher("ab"); + assertTrue(m.find()); + + { + StringBuilder sb = new StringBuilder(); + + // Test invalid group references + try { + m.appendReplacement(sb, "$foo"); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("Illegal group reference"); + } + try { + m.appendReplacement(sb, "foo$"); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("Illegal group reference"); + } + try { + m.appendReplacement(sb, "foo$$bar"); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("Illegal group reference"); + } + + // Test invalid escapes + try { + m.appendReplacement(sb, "foo\\"); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("character to be escaped is missing"); + } + + // Test invalid named group syntax + try { + m.appendReplacement(sb, "foo${bar"); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("named capture group is missing trailing '}'"); + } + } + + { + // Test valid cases to ensure we didn't break them + StringBuilder sb = new StringBuilder(); + m.appendReplacement(sb, "foo\\$bar"); + assertEquals("foo$bar", sb.toString()); + + m.reset(); + assertTrue(m.find()); + sb = new StringBuilder(); + m.appendReplacement(sb, "foo\\\\bar"); + assertEquals("foo\\bar", sb.toString()); + + // Test valid group references + Matcher m2 = Pattern.compile("(?ab)").matcher("ab"); + assertTrue(m2.find()); + + sb = new StringBuilder(); + m2.appendReplacement(sb, "foo$1bar"); + assertEquals("fooabbar", sb.toString()); + + m2.reset(); + assertTrue(m2.find()); + sb = new StringBuilder(); + m2.appendReplacement(sb, "foo${groupName}bar"); + assertEquals("fooabbar", sb.toString()); + } + } }