From ddbc9cd185caf3f4f25fc558642141474b4ced99 Mon Sep 17 00:00:00 2001 From: Robert Nagy Date: Sun, 22 Mar 2026 10:53:00 +0100 Subject: [PATCH] buffer: add end parameter To limit how far into the buffer we search without allocating an unnecessary subarray. PR-URL: https://github.com/nodejs/node/pull/62390 --- doc/api/buffer.md | 25 +++++++-- lib/buffer.js | 75 ++++++++++++++++--------- src/node_buffer.cc | 83 ++++++++++++++++++++-------- test/parallel/test-buffer-indexof.js | 58 +++++++++++++++++++ typings/internalBinding/buffer.d.ts | 6 +- 5 files changed, 190 insertions(+), 57 deletions(-) diff --git a/doc/api/buffer.md b/doc/api/buffer.md index 329d1f3f9915df..342701d1d6275f 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -2069,11 +2069,14 @@ console.log(buf.fill('zz', 'hex')); // Throws an exception. ``` -### `buf.includes(value[, byteOffset][, encoding])` +### `buf.includes(value[, byteOffset[, end]][, encoding])` * `value` {string|Buffer|Uint8Array|integer} What to search for. -* `byteOffset` {integer} Where to begin searching in `buf`. If negative, then +* `start` {integer} Where to begin searching in `buf`. If negative, then offset is calculated from the end of `buf`. **Default:** `0`. +* `end` {integer} Where to stop searching in `buf` (exclusive). **Default:** + `buf.length`. * `encoding` {string} If `value` is a string, this is the encoding used to determine the binary representation of the string that will be searched for in `buf`. **Default:** `'utf8'`. @@ -2310,20 +2320,25 @@ for (const key of buf.keys()) { // 5 ``` -### `buf.lastIndexOf(value[, byteOffset][, encoding])` +### `buf.lastIndexOf(value[, start[, end]][, encoding])` * `value` {string|Buffer|Uint8Array|integer} What to search for. -* `byteOffset` {integer} Where to begin searching in `buf`. If negative, then +* `start` {integer} Where to begin searching in `buf`. If negative, then offset is calculated from the end of `buf`. **Default:** `buf.length - 1`. +* `end` {integer} Where to stop searching in `buf` (exclusive). **Default:** + `buf.length`. * `encoding` {string} If `value` is a string, this is the encoding used to determine the binary representation of the string that will be searched for in `buf`. **Default:** `'utf8'`. diff --git a/lib/buffer.js b/lib/buffer.js index cf9e0ca50c3d7e..22344dfabc6e3f 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -695,8 +695,8 @@ const encodingOps = { byteLength: byteLengthUtf8, write: utf8Write, slice: utf8Slice, - indexOf: (buf, val, byteOffset, dir) => - indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir), + indexOf: (buf, val, byteOffset, dir, end) => + indexOfString(buf, val, byteOffset, encodingsMap.utf8, dir, end), }, ucs2: { encoding: 'ucs2', @@ -704,8 +704,8 @@ const encodingOps = { byteLength: (string) => string.length * 2, write: ucs2Write, slice: ucs2Slice, - indexOf: (buf, val, byteOffset, dir) => - indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir), + indexOf: (buf, val, byteOffset, dir, end) => + indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir, end), }, utf16le: { encoding: 'utf16le', @@ -713,8 +713,8 @@ const encodingOps = { byteLength: (string) => string.length * 2, write: ucs2Write, slice: ucs2Slice, - indexOf: (buf, val, byteOffset, dir) => - indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir), + indexOf: (buf, val, byteOffset, dir, end) => + indexOfString(buf, val, byteOffset, encodingsMap.utf16le, dir, end), }, latin1: { encoding: 'latin1', @@ -722,8 +722,8 @@ const encodingOps = { byteLength: (string) => string.length, write: latin1Write, slice: latin1Slice, - indexOf: (buf, val, byteOffset, dir) => - indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir), + indexOf: (buf, val, byteOffset, dir, end) => + indexOfString(buf, val, byteOffset, encodingsMap.latin1, dir, end), }, ascii: { encoding: 'ascii', @@ -731,12 +731,13 @@ const encodingOps = { byteLength: (string) => string.length, write: asciiWrite, slice: asciiSlice, - indexOf: (buf, val, byteOffset, dir) => + indexOf: (buf, val, byteOffset, dir, end) => indexOfBuffer(buf, fromStringFast(val, encodingOps.ascii), byteOffset, encodingsMap.ascii, - dir), + dir, + end), }, base64: { encoding: 'base64', @@ -744,12 +745,13 @@ const encodingOps = { byteLength: (string) => base64ByteLength(string, string.length), write: base64Write, slice: base64Slice, - indexOf: (buf, val, byteOffset, dir) => + indexOf: (buf, val, byteOffset, dir, end) => indexOfBuffer(buf, fromStringFast(val, encodingOps.base64), byteOffset, encodingsMap.base64, - dir), + dir, + end), }, base64url: { encoding: 'base64url', @@ -757,12 +759,13 @@ const encodingOps = { byteLength: (string) => base64ByteLength(string, string.length), write: base64urlWrite, slice: base64urlSlice, - indexOf: (buf, val, byteOffset, dir) => + indexOf: (buf, val, byteOffset, dir, end) => indexOfBuffer(buf, fromStringFast(val, encodingOps.base64url), byteOffset, encodingsMap.base64url, - dir), + dir, + end), }, hex: { encoding: 'hex', @@ -770,12 +773,13 @@ const encodingOps = { byteLength: (string) => string.length >>> 1, write: hexWrite, slice: hexSlice, - indexOf: (buf, val, byteOffset, dir) => + indexOf: (buf, val, byteOffset, dir, end) => indexOfBuffer(buf, fromStringFast(val, encodingOps.hex), byteOffset, encodingsMap.hex, - dir), + dir, + end), }, }; function getEncodingOps(encoding) { @@ -1029,9 +1033,10 @@ Buffer.prototype.compare = function compare(target, // - buffer - a Buffer to search // - val - a string, Buffer, or number // - byteOffset - an index into `buffer`; will be clamped to an int32 +// - end - absolute exclusive end of the search range // - encoding - an optional encoding, relevant if val is a string // - dir - true for indexOf, false for lastIndexOf -function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) { +function bidirectionalIndexOf(buffer, val, byteOffset, end, encoding, dir) { validateBuffer(buffer); if (typeof byteOffset === 'string') { @@ -1051,7 +1056,7 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) { dir = !!dir; // Cast to bool. if (typeof val === 'number') - return indexOfNumber(buffer, val >>> 0, byteOffset, dir); + return indexOfNumber(buffer, val >>> 0, byteOffset, dir, end); let ops; if (encoding === undefined) @@ -1062,13 +1067,13 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) { if (typeof val === 'string') { if (ops === undefined) throw new ERR_UNKNOWN_ENCODING(encoding); - return ops.indexOf(buffer, val, byteOffset, dir); + return ops.indexOf(buffer, val, byteOffset, dir, end); } if (isUint8Array(val)) { const encodingVal = (ops === undefined ? encodingsMap.utf8 : ops.encodingVal); - return indexOfBuffer(buffer, val, byteOffset, encodingVal, dir); + return indexOfBuffer(buffer, val, byteOffset, encodingVal, dir, end); } throw new ERR_INVALID_ARG_TYPE( @@ -1076,16 +1081,34 @@ function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) { ); } -Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) { - return bidirectionalIndexOf(this, val, byteOffset, encoding, true); +Buffer.prototype.indexOf = function indexOf(val, offset, end, encoding) { + if (typeof end === 'string') { + encoding = end; + end = this.length; + } else if (end === undefined) { + end = this.length; + } + return bidirectionalIndexOf(this, val, offset, end, encoding, true); }; -Buffer.prototype.lastIndexOf = function lastIndexOf(val, byteOffset, encoding) { - return bidirectionalIndexOf(this, val, byteOffset, encoding, false); +Buffer.prototype.lastIndexOf = function lastIndexOf(val, offset, end, encoding) { + if (typeof end === 'string') { + encoding = end; + end = this.length; + } else if (end === undefined) { + end = this.length; + } + return bidirectionalIndexOf(this, val, offset, end, encoding, false); }; -Buffer.prototype.includes = function includes(val, byteOffset, encoding) { - return bidirectionalIndexOf(this, val, byteOffset, encoding, true) !== -1; +Buffer.prototype.includes = function includes(val, offset, end, encoding) { + if (typeof end === 'string') { + encoding = end; + end = this.length; + } else if (end === undefined) { + end = this.length; + } + return bidirectionalIndexOf(this, val, offset, end, encoding, true) !== -1; }; // Usage: diff --git a/src/node_buffer.cc b/src/node_buffer.cc index acf4aff37c5a9c..e0a6d6cd2f4ce7 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -962,6 +962,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { CHECK(args[2]->IsNumber()); CHECK(args[3]->IsInt32()); CHECK(args[4]->IsBoolean()); + CHECK(args[5]->IsNumber()); enum encoding enc = static_cast(args[3].As()->Value()); @@ -971,6 +972,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { Local needle = args[1].As(); int64_t offset_i64 = args[2].As()->Value(); bool is_forward = args[4]->IsTrue(); + int64_t end_i64 = args[5].As()->Value(); const char* haystack = buffer.data(); // Round down to the nearest multiple of 2 in case of UCS2. @@ -980,6 +982,11 @@ void IndexOfString(const FunctionCallbackInfo& args) { size_t needle_length; if (!StringBytes::Size(isolate, needle, enc).To(&needle_length)) return; + // search_end is the exclusive upper bound of the search range. + size_t search_end = static_cast( + std::min(end_i64, static_cast(haystack_length))); + if (enc == UCS2) search_end &= ~static_cast(1); + int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, needle_length, @@ -999,17 +1006,24 @@ void IndexOfString(const FunctionCallbackInfo& args) { return args.GetReturnValue().Set(-1); } size_t offset = static_cast(opt_offset); + // For backward search, clamp start to within the search range. + if (!is_forward && offset >= search_end) { + if (search_end == 0) return args.GetReturnValue().Set(-1); + offset = search_end - 1; + } else if (is_forward && offset >= search_end) { + return args.GetReturnValue().Set(-1); + } CHECK_LT(offset, haystack_length); - if ((is_forward && needle_length + offset > haystack_length) || - needle_length > haystack_length) { + if ((is_forward && needle_length + offset > search_end) || + needle_length > search_end) { return args.GetReturnValue().Set(-1); } - size_t result = haystack_length; + size_t result = search_end; if (enc == UCS2) { TwoByteValue needle_value(isolate, needle); - if (haystack_length < 2 || needle_value.length() < 1) { + if (search_end < 2 || needle_value.length() < 1) { return args.GetReturnValue().Set(-1); } @@ -1023,14 +1037,14 @@ void IndexOfString(const FunctionCallbackInfo& args) { return args.GetReturnValue().Set(-1); result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length / 2, + search_end / 2, decoded_string, decoder.size() / 2, offset / 2, is_forward); } else { result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length / 2, + search_end / 2, needle_value.out(), needle_value.length(), offset / 2, @@ -1045,7 +1059,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { result = nbytes::SearchString( reinterpret_cast(haystack), - haystack_length, + search_end, reinterpret_cast(needle_value.out()), needle_length, offset, @@ -1059,15 +1073,15 @@ void IndexOfString(const FunctionCallbackInfo& args) { enc); result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length, + search_end, needle_data.out(), needle_length, offset, is_forward); } - args.GetReturnValue().Set( - result == haystack_length ? -1 : static_cast(result)); + args.GetReturnValue().Set(result >= search_end ? -1 + : static_cast(result)); } void IndexOfBuffer(const FunctionCallbackInfo& args) { @@ -1075,6 +1089,7 @@ void IndexOfBuffer(const FunctionCallbackInfo& args) { CHECK(args[2]->IsNumber()); CHECK(args[3]->IsInt32()); CHECK(args[4]->IsBoolean()); + CHECK(args[5]->IsNumber()); enum encoding enc = static_cast(args[3].As()->Value()); @@ -1085,12 +1100,18 @@ void IndexOfBuffer(const FunctionCallbackInfo& args) { ArrayBufferViewContents needle_contents(args[1]); int64_t offset_i64 = args[2].As()->Value(); bool is_forward = args[4]->IsTrue(); + int64_t end_i64 = args[5].As()->Value(); const char* haystack = haystack_contents.data(); const size_t haystack_length = haystack_contents.length(); const char* needle = needle_contents.data(); const size_t needle_length = needle_contents.length(); + // search_end is the exclusive upper bound of the search range. + size_t search_end = static_cast( + std::min(end_i64, static_cast(haystack_length))); + if (enc == UCS2) search_end &= ~static_cast(1); + int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, needle_length, @@ -1110,20 +1131,27 @@ void IndexOfBuffer(const FunctionCallbackInfo& args) { return args.GetReturnValue().Set(-1); } size_t offset = static_cast(opt_offset); + // For backward search, clamp start to within the search range. + if (!is_forward && offset >= search_end) { + if (search_end == 0) return args.GetReturnValue().Set(-1); + offset = search_end - 1; + } else if (is_forward && offset >= search_end) { + return args.GetReturnValue().Set(-1); + } CHECK_LT(offset, haystack_length); - if ((is_forward && needle_length + offset > haystack_length) || - needle_length > haystack_length) { + if ((is_forward && needle_length + offset > search_end) || + needle_length > search_end) { return args.GetReturnValue().Set(-1); } - size_t result = haystack_length; + size_t result = search_end; if (enc == UCS2) { - if (haystack_length < 2 || needle_length < 2) { + if (search_end < 2 || needle_length < 2) { return args.GetReturnValue().Set(-1); } result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length / 2, + search_end / 2, reinterpret_cast(needle), needle_length / 2, offset / 2, @@ -1131,20 +1159,21 @@ void IndexOfBuffer(const FunctionCallbackInfo& args) { result *= 2; } else { result = nbytes::SearchString(reinterpret_cast(haystack), - haystack_length, + search_end, reinterpret_cast(needle), needle_length, offset, is_forward); } - args.GetReturnValue().Set( - result == haystack_length ? -1 : static_cast(result)); + args.GetReturnValue().Set(result >= search_end ? -1 + : static_cast(result)); } int32_t IndexOfNumberImpl(Local buffer_obj, const uint32_t needle, const int64_t offset_i64, + const int64_t end_i64, const bool is_forward) { ArrayBufferViewContents buffer(buffer_obj); const uint8_t* buffer_data = buffer.data(); @@ -1154,13 +1183,18 @@ int32_t IndexOfNumberImpl(Local buffer_obj, return -1; } size_t offset = static_cast(opt_offset); - CHECK_LT(offset, buffer_length); + // search_end is the exclusive upper bound of the search range. + size_t search_end = static_cast( + std::min(end_i64, static_cast(buffer_length))); const void* ptr; if (is_forward) { - ptr = memchr(buffer_data + offset, needle, buffer_length - offset); + if (offset >= search_end) return -1; + ptr = memchr(buffer_data + offset, needle, search_end - offset); } else { - ptr = nbytes::stringsearch::MemrchrFill(buffer_data, needle, offset + 1); + size_t backward_end = std::min(offset + 1, search_end); + if (backward_end == 0) return -1; + ptr = nbytes::stringsearch::MemrchrFill(buffer_data, needle, backward_end); } const uint8_t* ptr_uint8 = static_cast(ptr); return ptr != nullptr ? static_cast(ptr_uint8 - buffer_data) : -1; @@ -1170,6 +1204,7 @@ void SlowIndexOfNumber(const FunctionCallbackInfo& args) { CHECK(args[1]->IsUint32()); CHECK(args[2]->IsNumber()); CHECK(args[3]->IsBoolean()); + CHECK(args[4]->IsNumber()); THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); @@ -1177,20 +1212,22 @@ void SlowIndexOfNumber(const FunctionCallbackInfo& args) { uint32_t needle = args[1].As()->Value(); int64_t offset_i64 = args[2].As()->Value(); bool is_forward = args[3]->IsTrue(); + int64_t end_i64 = args[4].As()->Value(); args.GetReturnValue().Set( - IndexOfNumberImpl(buffer_obj, needle, offset_i64, is_forward)); + IndexOfNumberImpl(buffer_obj, needle, offset_i64, end_i64, is_forward)); } int32_t FastIndexOfNumber(Local, Local buffer_obj, uint32_t needle, int64_t offset_i64, + int64_t end_i64, bool is_forward, // NOLINTNEXTLINE(runtime/references) FastApiCallbackOptions& options) { HandleScope scope(options.isolate); - return IndexOfNumberImpl(buffer_obj, needle, offset_i64, is_forward); + return IndexOfNumberImpl(buffer_obj, needle, offset_i64, end_i64, is_forward); } static CFunction fast_index_of_number(CFunction::Make(FastIndexOfNumber)); diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 37665b4e4092c2..21fcd78477ab06 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -633,3 +633,61 @@ assert.strictEqual(reallyLong.lastIndexOf(pattern), 0); 'Received an instance of lastIndexOf' }); } + +{ + const buf = Buffer.from('abcabc'); + + assert.strictEqual(buf.indexOf('c', 0, 3), 2); + assert.strictEqual(buf.indexOf('c', 0, 2), -1); + assert.strictEqual(buf.indexOf('a', 0, 1), 0); + assert.strictEqual(buf.indexOf('a', 0, 0), -1); + assert.strictEqual(buf.indexOf('abc', 0, 3), 0); + assert.strictEqual(buf.indexOf('abc', 0, 2), -1); + + assert.strictEqual(buf.indexOf('a', 2, 5), 3); + assert.strictEqual(buf.indexOf('a', 2, 3), -1); + assert.strictEqual(buf.indexOf('bc', 1, 4), 1); + assert.strictEqual(buf.indexOf('bc', 1, 3), 1); + + assert.strictEqual(buf.indexOf(Buffer.from('bc'), 0, 3), 1); + assert.strictEqual(buf.indexOf(Buffer.from('bc'), 0, 2), -1); + assert.strictEqual(buf.indexOf(new Uint8Array([0x61]), 0, 4), 0); + + assert.strictEqual(buf.indexOf(0x61, 0, 3), 0); + assert.strictEqual(buf.indexOf(0x61, 0, 1), 0); + assert.strictEqual(buf.indexOf(0x61, 1, 4), 3); + assert.strictEqual(buf.indexOf(0x61, 1, 3), -1); + assert.strictEqual(buf.indexOf(0x63, 0, 2), -1); + + assert.strictEqual(buf.indexOf('a', 0, 'utf8'), 0); + assert.strictEqual(buf.indexOf('abc', 0, 'utf8'), 0); + + assert.strictEqual(buf.indexOf('c'), 2); + assert.strictEqual(buf.indexOf('c', 3), 5); + + const latin1buf = Buffer.from('abcabc', 'latin1'); + assert.strictEqual(latin1buf.indexOf('c', 0, 3, 'latin1'), 2); + assert.strictEqual(latin1buf.indexOf('c', 0, 2, 'latin1'), -1); + + assert.strictEqual(buf.lastIndexOf('a', 5, 4), 3); + assert.strictEqual(buf.lastIndexOf('a', 5, 3), 0); + assert.strictEqual(buf.lastIndexOf('c', 5, 3), 2); + assert.strictEqual(buf.lastIndexOf('c', 5, buf.length), 5); + + assert.strictEqual(buf.lastIndexOf(0x61, 5, 4), 3); + assert.strictEqual(buf.lastIndexOf(0x61, 5, 3), 0); + + assert.strictEqual(buf.lastIndexOf('a', 5, 'utf8'), 3); + + assert.strictEqual(buf.lastIndexOf('a'), 3); + assert.strictEqual(buf.lastIndexOf('a', 2), 0); + + assert.strictEqual(buf.includes('c', 0, 3), true); + assert.strictEqual(buf.includes('c', 0, 2), false); + assert.strictEqual(buf.includes('abc', 0, 3), true); + assert.strictEqual(buf.includes('abc', 0, 2), false); + + assert.strictEqual(buf.includes('a', 0, 'utf8'), true); + + assert.strictEqual(buf.includes('c'), true); +} diff --git a/typings/internalBinding/buffer.d.ts b/typings/internalBinding/buffer.d.ts index 62d719eadfbaf7..a384fb016128f8 100644 --- a/typings/internalBinding/buffer.d.ts +++ b/typings/internalBinding/buffer.d.ts @@ -9,9 +9,9 @@ export interface BufferBinding { compare(a: ArrayBufferView, b: ArrayBufferView): number; compareOffset(source: ArrayBufferView, target: ArrayBufferView, targetStart?: number, sourceStart?: number, targetEnd?: number, sourceEnd?: number): number; fill(buf: ArrayBufferView, val: any, start?: number, end?: number, encoding?: number): -1 | -2 | void; - indexOfBuffer(haystack: ArrayBufferView, needle: ArrayBufferView, offset?: number, encoding?: number, isForward?: boolean): number; - indexOfNumber(buf: ArrayBufferView, needle: number, offset?: number, isForward?: boolean): number; - indexOfString(buf: ArrayBufferView, needle: string, offset?: number, encoding?: number, isForward?: boolean): number; + indexOfBuffer(haystack: ArrayBufferView, needle: ArrayBufferView, offset?: number, encoding?: number, isForward?: boolean, end?: number): number; + indexOfNumber(buf: ArrayBufferView, needle: number, offset?: number, isForward?: boolean, end?: number): number; + indexOfString(buf: ArrayBufferView, needle: string, offset?: number, encoding?: number, isForward?: boolean, end?: number): number; copyArrayBuffer(destination: ArrayBuffer | SharedArrayBuffer, destinationOffset: number, source: ArrayBuffer | SharedArrayBuffer, sourceOffset: number, bytesToCopy: number): void;