From 20900bc6190ef9222ce0c6ddae19d35a947d92f4 Mon Sep 17 00:00:00 2001 From: Simon Mavi Stewart Date: Thu, 2 Jul 2026 11:28:08 +0100 Subject: [PATCH 1/2] [java] Fix JSON parser EOF sentinel collision with U+FFFF Input used (char) -1 as its EOF sentinel. That value is 0xFFFF - a valid Unicode code unit that can legitimately appear in JSON. Any string containing it was mis-reported as an unterminated string. Switch peek()/read() to return int with -1 as the sentinel (matching Reader.read()) so the sentinel cannot collide with a valid UTF-16 code unit. --- java/src/org/openqa/selenium/json/Input.java | 19 ++++++++---- .../org/openqa/selenium/json/JsonInput.java | 31 +++++++++---------- .../openqa/selenium/json/JsonInputTest.java | 13 ++++++++ 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/java/src/org/openqa/selenium/json/Input.java b/java/src/org/openqa/selenium/json/Input.java index d1f639abb3c64..9a282811ca8ae 100644 --- a/java/src/org/openqa/selenium/json/Input.java +++ b/java/src/org/openqa/selenium/json/Input.java @@ -29,8 +29,13 @@ * read characters in the input buffer. */ class Input { - /** end-of-file indicator (0xFFFD) */ - public static final char EOF = (char) -1; // NOTE: Produces Unicode replacement character (0xFFFD) + /** + * End-of-input sentinel returned by {@link #peek()} and {@link #read()}. + * + *

Value {@code -1} mirrors {@link java.io.Reader#read()} and — unlike a {@code char} sentinel + * — cannot collide with any valid UTF-16 code unit (including U+FFFF). + */ + public static final int EOF = -1; /** the number of chars to buffer */ private static final int BUFFER_SIZE = 4096; @@ -64,18 +69,20 @@ public Input(Reader source) { /** * Extract the next character from the input without consuming it. * - * @return the next input character; {@link #EOF} if input is exhausted + * @return the next input character as an unsigned UTF-16 code unit (0-65535); {@link #EOF} if + * input is exhausted */ - public char peek() { + public int peek() { return fill() ? buffer[position + 1] : EOF; } /** * Read and consume the next character from the input. * - * @return the next input character; {@link #EOF} if input is exhausted + * @return the next input character as an unsigned UTF-16 code unit (0-65535); {@link #EOF} if + * input is exhausted */ - public char read() { + public int read() { return fill() ? buffer[++position] : EOF; } diff --git a/java/src/org/openqa/selenium/json/JsonInput.java b/java/src/org/openqa/selenium/json/JsonInput.java index 99c79373db482..4f4220f78fe0c 100644 --- a/java/src/org/openqa/selenium/json/JsonInput.java +++ b/java/src/org/openqa/selenium/json/JsonInput.java @@ -165,8 +165,8 @@ public JsonType peek() { return JsonType.END; default: - char c = input.read(); - throw new JsonException("Unable to determine type from: " + c + ". " + input); + int c = input.read(); + throw new JsonException("Unable to determine type from: " + (char) c + ". " + input); } } @@ -194,10 +194,10 @@ public String nextName() { String name = readString(); skipWhitespace(input); - char read = input.read(); + int read = input.read(); if (read != ':') { throw new JsonException( - "Unable to read name. Expected colon separator, but saw '" + read + "'"); + "Unable to read name. Expected colon separator, but saw '" + (char) read + "'"); } return name; } @@ -241,13 +241,13 @@ public Number nextNumber() { case '7': case '8': case '9': - builder.append(input.read()); + builder.append((char) input.read()); break; case '.': case 'e': case 'E': mightBeDecimal = true; - builder.append(input.read()); + builder.append((char) input.read()); break; default: read = false; @@ -552,11 +552,11 @@ private void expect(JsonType type) { int toCompareLength = toCompare.length(); for (int i = 0; i < toCompareLength; i++) { - char read = input.read(); + int read = input.read(); if (read != toCompare.charAt(i)) { throw new JsonException( String.format( - "Unable to read %s. Saw %s at position %d. %s", toCompare, read, i, input)); + "Unable to read %s. Saw %s at position %d. %s", toCompare, (char) read, i, input)); } } @@ -574,9 +574,8 @@ private String readString() { input.read(); // Skip leading quote StringBuilder builder = new StringBuilder(); - char c; while (true) { - c = input.read(); + int c = input.read(); switch (c) { case Input.EOF: throw new JsonException("Unterminated string: " + builder + ". " + input); @@ -586,7 +585,7 @@ private String readString() { readEscape(builder); break; default: - builder.append(c); + builder.append((char) c); } } } @@ -601,7 +600,7 @@ private String readString() { */ // FIXME: This function doesn't appear to support UTF-8 or UTF-32. private void readEscape(StringBuilder builder) { - char read = input.read(); + int read = input.read(); // List from: https://tools.ietf.org/html/rfc7159.html#section-7 switch (read) { @@ -629,10 +628,10 @@ private void readEscape(StringBuilder builder) { int result = 0; int multiplier = 4096; // (16 * 16 * 16) as we start from the thousands and work to units. for (int i = 0; i < 4; i++) { - char c = input.read(); + int c = input.read(); int digit = Character.digit(c, 16); if (digit == -1) { - throw new JsonException(c + " is not a hexadecimal digit. " + input); + throw new JsonException((char) c + " is not a hexadecimal digit. " + input); } result += digit * multiplier; multiplier /= 16; @@ -643,11 +642,11 @@ private void readEscape(StringBuilder builder) { case '/': case '\\': case '"': - builder.append(read); + builder.append((char) read); break; default: - throw new JsonException("Unexpected escape code: " + read + ". " + input); + throw new JsonException("Unexpected escape code: " + (char) read + ". " + input); } } diff --git a/java/test/org/openqa/selenium/json/JsonInputTest.java b/java/test/org/openqa/selenium/json/JsonInputTest.java index 289a5f8bbf4d9..2c8dd1188c5c5 100644 --- a/java/test/org/openqa/selenium/json/JsonInputTest.java +++ b/java/test/org/openqa/selenium/json/JsonInputTest.java @@ -291,6 +291,19 @@ void shouldBeAbleToReadNonWellFormedDataLongerThanReadBuffer() { } } + @Test + void shouldReadU_FFFF_AsALiteralCharacterAndNotEndOfInput() { + // U+FFFF is a valid (non-)character; historically it collided with an in-band EOF sentinel + // and was mis-reported as an unterminated string. + try (JsonInput input = newInput("\"a￿b\"")) { + assertThat(input.nextString()).isEqualTo("a￿b"); + } + + try (JsonInput input = newInput("\"\\uFFFF\"")) { + assertThat(input.nextString()).isEqualTo("￿"); + } + } + @Test void nullInputsShouldCoerceAsNullValues() throws IOException { try (InputStream is = new ByteArrayInputStream(new byte[0]); From de2ada027f4db1739cfbc4b20b9ec1aac5fb415d Mon Sep 17 00:00:00 2001 From: Simon Mavi Stewart Date: Thu, 2 Jul 2026 12:30:24 +0100 Subject: [PATCH 2/2] [java] Construct U+FFFF payload at runtime instead of embedding it Avoids depending on the source file's byte encoding when reading the test. --- .../org/openqa/selenium/json/JsonInputTest.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/java/test/org/openqa/selenium/json/JsonInputTest.java b/java/test/org/openqa/selenium/json/JsonInputTest.java index 2c8dd1188c5c5..73245c604e28a 100644 --- a/java/test/org/openqa/selenium/json/JsonInputTest.java +++ b/java/test/org/openqa/selenium/json/JsonInputTest.java @@ -293,14 +293,19 @@ void shouldBeAbleToReadNonWellFormedDataLongerThanReadBuffer() { @Test void shouldReadU_FFFF_AsALiteralCharacterAndNotEndOfInput() { - // U+FFFF is a valid (non-)character; historically it collided with an in-band EOF sentinel - // and was mis-reported as an unterminated string. - try (JsonInput input = newInput("\"a￿b\"")) { - assertThat(input.nextString()).isEqualTo("a￿b"); + // U+FFFF is a valid Unicode code unit that historically collided with the in-band EOF + // sentinel and was mis-reported as an unterminated string. Build the strings from + // char values rather than embedding literal U+FFFF so the test is independent of the + // source file's byte encoding. + char nonChar = (char) 0xFFFF; + String literalPayload = "a" + nonChar + "b"; + + try (JsonInput input = newInput("\"" + literalPayload + "\"")) { + assertThat(input.nextString()).isEqualTo(literalPayload); } try (JsonInput input = newInput("\"\\uFFFF\"")) { - assertThat(input.nextString()).isEqualTo("￿"); + assertThat(input.nextString()).isEqualTo(String.valueOf(nonChar)); } }