From ad1a165e0660903b8be5df2159675a5b99a1bd81 Mon Sep 17 00:00:00 2001 From: sjhddh <151469562+sjhddh@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:45:15 +0200 Subject: [PATCH 1/3] fix: realign FFI buffers before validation in from_ffi (#10034) `from_ffi` / `from_ffi_and_data_type` realigned protocol-legal but under-aligned C Data Interface buffers (e.g. 8-byte-aligned Decimal128 from a JVM producer) by calling `align_buffers()` after `ImportedArrowArray::consume`. Under `force_validate` that realignment was unreachable: `consume` built `ArrayData` via `new_unchecked` -> `ArrayDataBuilder::build`, which validates whenever `force_validate` is set and rejected the misaligned buffer before the outer realignment ran. Build the imported `ArrayData` via `ArrayDataBuilder` with `align_buffers(true).skip_validation(true)` so buffers are realigned before validation, which is correct under `force_validate` (it still validates after realignment) and keeps skipping redundant FFI validation otherwise. Drop the now-dead outer `align_buffers()` calls. Rework and un-gate `test_decimal128_under_aligned_round_trip` so it runs under `force_validate`: the old test constructed its under-aligned input as a Decimal128 `ArrayData` via `build_unchecked`, which itself validates under `force_validate`; the rework exports the bytes as a UInt8 array and re-imports them as Decimal128, asserting the imported buffer is realigned. Also document that `ArrayData::align_buffers` must run before validation on import boundaries. Co-Authored-By: Claude Opus 4 Signed-off-by: sjhddh <151469562+sjhddh@users.noreply.github.com> --- arrow-array/src/ffi.rs | 98 +++++++++++++++++++++++------------------- arrow-data/src/data.rs | 5 +++ 2 files changed, 58 insertions(+), 45 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 110dfc9855bb..affb60af9f04 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -105,7 +105,7 @@ use std::{mem::size_of, ptr::NonNull, sync::Arc}; use arrow_buffer::{Buffer, MutableBuffer, bit_util}; pub use arrow_data::ffi::FFI_ArrowArray; -use arrow_data::{ArrayData, layout}; +use arrow_data::{ArrayData, ArrayDataBuilder, layout}; pub use arrow_schema::ffi::FFI_ArrowSchema; use arrow_schema::{ArrowError, DataType, UnionMode}; @@ -287,14 +287,9 @@ pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Resul data_type: dt, owner: &array, }; - let mut data = tmp.consume()?; - // arrow-rs has stricter alignment requirements than the C Data Interface spec; - // a no-op when buffers are already aligned. Unreachable under - // `cfg(feature = "force_validate")`; tracked in #10034. - // See https://github.com/apache/arrow/issues/43552 and - // https://github.com/apache/arrow-rs/issues/10028 for context. - data.align_buffers(); - Ok(data) + // `consume` realigns buffers before validating (see #10034), so no + // separate `align_buffers()` is needed here. + tmp.consume() } /// Import [ArrayData] from the C Data Interface @@ -312,14 +307,9 @@ pub unsafe fn from_ffi_and_data_type( data_type, owner: &array, }; - let mut data = tmp.consume()?; - // arrow-rs has stricter alignment requirements than the C Data Interface spec; - // a no-op when buffers are already aligned. Unreachable under - // `cfg(feature = "force_validate")`; tracked in #10034. - // See https://github.com/apache/arrow/issues/43552 and - // https://github.com/apache/arrow-rs/issues/10028 for context. - data.align_buffers(); - Ok(data) + // `consume` realigns buffers before validating (see #10034), so no + // separate `align_buffers()` is needed here. + tmp.consume() } #[derive(Debug)] @@ -356,18 +346,27 @@ impl ImportedArrowArray<'_> { child_data.push(d.consume()?); } - // Should FFI be checking validity? - Ok(unsafe { - ArrayData::new_unchecked( - self.data_type, - len, - null_count, - null_bit_buffer, - offset, - buffers, - child_data, - ) - }) + // 8-byte alignment is legal over the C Data Interface but violates + // arrow-rs's stricter `ArrayData` alignment invariant, so realign the + // imported buffers *before* validating them. `align_buffers(true)` + // realigns and then `build` validates (or skips validation on the FFI + // hot path, except under `force_validate` which always validates). This + // ordering is what makes import work under `force_validate`; see #10034. + let mut builder = ArrayDataBuilder::new(self.data_type) + .len(len) + .offset(offset) + .null_bit_buffer(null_bit_buffer) + .buffers(buffers) + .child_data(child_data) + .align_buffers(true); + if let Some(null_count) = null_count { + builder = builder.null_count(null_count); + } + // SAFETY: `skip_validation` only skips the redundant FFI validation on + // the hot path; `align_buffers(true)` above guarantees the buffers meet + // arrow-rs's alignment invariant, and under `force_validate` `build` + // still runs full validation after realignment. + unsafe { builder.skip_validation(true) }.build() } fn consume_children(&self) -> Result> { @@ -688,29 +687,38 @@ mod tests_to_then_from_ffi { // case with nulls is tested in the docs, through the example on this module. #[test] - #[cfg(not(feature = "force_validate"))] fn test_decimal128_under_aligned_round_trip() -> Result<()> { - // Construct an 8-aligned-but-not-16-aligned i128 data buffer to model - // an FFI producer that only guarantees the C Data Interface's - // recommended 8-byte alignment (e.g. arrow-java). + // Model an FFI producer that only guarantees the C Data Interface's + // recommended 8-byte alignment (e.g. arrow-java): an i128 data buffer + // that is 8-aligned but not 16-aligned. let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]); let under_aligned = aligned.slice(8); assert_eq!(under_aligned.as_ptr().align_offset(8), 0); assert_ne!(under_aligned.as_ptr().align_offset(16), 0); - // SAFETY: buffer is large enough for 2 i128 elements; misaligned - // input is the condition under test. - let data = unsafe { - ArrayData::builder(DataType::Decimal128(10, 2)) - .len(2) - .add_buffer(under_aligned) - .build_unchecked() - }; - - let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap(); - let array = FFI_ArrowArray::new(&data); + // Export the under-aligned bytes as a `UInt8` array (alignment 1, so it + // is valid Arrow data even under `force_validate`), then re-import them + // as `Decimal128`. This reproduces an under-aligned `Decimal128` buffer + // arriving over the C Data Interface without first having to construct + // an (invalid) under-aligned `Decimal128` `ArrayData` on the producer + // side, which `force_validate` would reject before export. See #10034. + let producer = ArrayData::builder(DataType::UInt8) + .len(under_aligned.len()) + .add_buffer(under_aligned) + .build()?; - let imported = unsafe { from_ffi(array, &schema) }?; + let mut array = FFI_ArrowArray::new(&producer); + // Re-describe the exported 32 data bytes as 2 `Decimal128` elements. + // The data pointer (`buffers[1]`) is unchanged and still 8-aligned. + array.length = 2; + array.null_count = 0; + + // SAFETY: the exported buffer holds 32 bytes = 2 little-endian i128 + // values; reinterpreting it as `Decimal128(10, 2)` of length 2 matches. + let imported = unsafe { from_ffi_and_data_type(array, DataType::Decimal128(10, 2)) }?; + // Import must realign the under-aligned buffer to satisfy arrow-rs's + // 16-byte `i128` alignment invariant, regardless of `force_validate`. + assert_eq!(imported.buffers()[0].as_ptr().align_offset(16), 0); let array = Decimal128Array::from(imported); // The little-endian byte layout of [0i128, 1, 2] sliced 8 bytes in diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 0ce98aa090df..514906fb89dc 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -815,6 +815,11 @@ impl ArrayData { /// This can be useful for when interacting with data sent over IPC or FFI, that may /// not meet the minimum alignment requirements /// + /// On an import boundary this must be called *before* validation: protocol-legal + /// but under-aligned input (e.g. 8-byte-aligned buffers over the C Data Interface) + /// would otherwise be rejected as an invariant violation. Prefer + /// [`ArrayDataBuilder::align_buffers`], which realigns before `build` validates. + /// /// This also aligns buffers of children data pub fn align_buffers(&mut self) { let layout = layout(&self.data_type); From 46840ddfbdee344e6d1e9b03d71da0f7fd82f6ab Mon Sep 17 00:00:00 2001 From: sjhddh <151469562+sjhddh@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:58:30 +0200 Subject: [PATCH 2/3] test: make from_ffi force_validate regression CI-reachable The inline arrow-array regression test for #10034 could not catch the bug in any standard CI invocation: arrow-array's force_validate feature is empty and does not forward to arrow-data/force_validate, where the validation gate that rejects misaligned buffers actually lives. So the inline test passed even with the fix reverted and guarded nothing. Move the authoritative regression guard into a new integration test, arrow/tests/ffi_from_ffi.rs. The arrow umbrella crate's force_validate feature forwards to arrow-data/force_validate, and arrow.yml runs `cargo test -p arrow --features=force_validate,...,ffi`, so the test now genuinely exercises the realign-before-validate path: it FAILS on unfixed `consume` with the Misaligned-buffers InvalidArgumentError and PASSES with the fix. The inline arrow-array test is kept as functional round-trip coverage, with a comment noting it is not the guard. Co-Authored-By: Claude Opus 4 Signed-off-by: sjhddh <151469562+sjhddh@users.noreply.github.com> --- arrow-array/src/ffi.rs | 9 ++++ arrow/tests/ffi_from_ffi.rs | 94 +++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 arrow/tests/ffi_from_ffi.rs diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index affb60af9f04..34d3e3b210ec 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -686,6 +686,15 @@ mod tests_to_then_from_ffi { } // case with nulls is tested in the docs, through the example on this module. + // Functional round-trip coverage of importing an under-aligned `Decimal128` + // buffer over the C Data Interface. NOTE: this is *not* the `force_validate` + // regression guard for #10034 — `arrow-array`'s `force_validate` feature is + // empty and does not forward to `arrow-data/force_validate`, so under + // `force_validate` this test does not actually reach `arrow-data`'s + // validation gate, and under default features the import succeeds even with + // an unfixed `consume` (the buffer still ends up realigned). The true + // CI-reachable regression guard lives in `arrow/tests/ffi_from_ffi.rs`, + // which runs under `cargo test -p arrow --features=force_validate,...,ffi`. #[test] fn test_decimal128_under_aligned_round_trip() -> Result<()> { // Model an FFI producer that only guarantees the C Data Interface's diff --git a/arrow/tests/ffi_from_ffi.rs b/arrow/tests/ffi_from_ffi.rs new file mode 100644 index 000000000000..0641781552f4 --- /dev/null +++ b/arrow/tests/ffi_from_ffi.rs @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Integration tests for the C Data Interface import path (`from_ffi`). +//! +//! This file lives in the `arrow` umbrella crate so that the `arrow` +//! crate's `force_validate` feature (which forwards to +//! `arrow-data/force_validate`) actually activates `arrow-data`'s +//! validation gate when the test runs. The `arrow-array` crate's own +//! `force_validate` feature is empty and does NOT forward to +//! `arrow-data`, so an inline `arrow-array` test cannot exercise the +//! realign-before-validate path under any standard CI invocation. CI runs +//! `cargo test -p arrow --features=force_validate,...,ffi` (arrow.yml), which +//! does activate the gate, so this is a CI-reachable regression guard for +//! . + +#![cfg(feature = "ffi")] + +use arrow_array::Decimal128Array; +use arrow_buffer::Buffer; +use arrow_data::ArrayData; +use arrow_data::ffi::FFI_ArrowArray; +use arrow_schema::{ArrowError, DataType}; + +use arrow::ffi::from_ffi_and_data_type; + +/// Regression test for #10034: `from_ffi` must realign under-aligned but +/// protocol-legal C Data Interface buffers *before* validating them. +/// +/// Under `force_validate` (active here via `arrow/force_validate -> +/// arrow-data/force_validate`), `ImportedArrowArray::consume` builds the +/// `ArrayData` via `ArrayDataBuilder::build`, which validates whenever +/// `force_validate` is set. If `consume` does not realign first (the pre-fix +/// behavior of `ArrayData::new_unchecked`), `build` rejects the +/// 8-byte-aligned (not 16-byte-aligned) `Decimal128` buffer with +/// `InvalidArgumentError("Misaligned buffers[0] ...")` before the import can +/// realign it. With the fix, `consume` realigns first and the import +/// succeeds in every feature configuration. +#[test] +fn test_decimal128_under_aligned_round_trip_force_validate() -> Result<(), ArrowError> { + // Model an FFI producer that only guarantees the C Data Interface's + // recommended 8-byte alignment (e.g. arrow-java): an i128 data buffer + // that is 8-aligned but not 16-aligned. + let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]); + let under_aligned = aligned.slice(8); + assert_eq!(under_aligned.as_ptr().align_offset(8), 0); + assert_ne!(under_aligned.as_ptr().align_offset(16), 0); + + // Export the under-aligned bytes as a `UInt8` array (alignment 1, so it is + // valid Arrow data even under `force_validate`), then re-import them as + // `Decimal128`. This reproduces an under-aligned `Decimal128` buffer + // arriving over the C Data Interface without having to construct an + // (invalid) under-aligned `Decimal128` `ArrayData` on the producer side, + // which `force_validate` would reject before export. + let producer = ArrayData::builder(DataType::UInt8) + .len(under_aligned.len()) + .add_buffer(under_aligned) + .build()?; + + let mut array = FFI_ArrowArray::new(&producer); + // Re-describe the exported 32 data bytes as 2 `Decimal128` elements. The + // data pointer (`buffers[1]`) is unchanged and still only 8-aligned. + array.length = 2; + array.null_count = 0; + + // SAFETY: the exported buffer holds 32 bytes = 2 little-endian i128 values; + // reinterpreting it as `Decimal128(10, 2)` of length 2 matches. + let imported = unsafe { from_ffi_and_data_type(array, DataType::Decimal128(10, 2)) }?; + // Import must realign the under-aligned buffer to satisfy arrow-rs's + // 16-byte `i128` alignment invariant, regardless of `force_validate`. + assert_eq!(imported.buffers()[0].as_ptr().align_offset(16), 0); + let array = Decimal128Array::from(imported); + + // The little-endian byte layout of [0i128, 1, 2] sliced 8 bytes in yields + // elements `1 << 64` and `2 << 64`. + assert_eq!(array.len(), 2); + assert_eq!(array.value(0), 1_i128 << 64); + assert_eq!(array.value(1), 2_i128 << 64); + Ok(()) +} From 4d1ebf26936c8c495049ae17a3e71068975df64a Mon Sep 17 00:00:00 2001 From: sjhddh <151469562+sjhddh@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:47:11 +0200 Subject: [PATCH 3/3] docs: trim verbose inline comments per review feedback Drop PR-narration and repeated issue-number references from the FFI import comments, collapsing them to concise notes on the realign-before-validate invariant and SAFETY conditions. --- arrow-array/src/ffi.rs | 64 +++++++++++++++---------------------- arrow-data/src/data.rs | 8 ++--- arrow/tests/ffi_from_ffi.rs | 57 ++++++++++++--------------------- 3 files changed, 50 insertions(+), 79 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 34d3e3b210ec..d7bc16de6287 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -287,8 +287,8 @@ pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Resul data_type: dt, owner: &array, }; - // `consume` realigns buffers before validating (see #10034), so no - // separate `align_buffers()` is needed here. + // `consume` realigns buffers before validating, so no separate + // `align_buffers()` is needed here. tmp.consume() } @@ -307,8 +307,8 @@ pub unsafe fn from_ffi_and_data_type( data_type, owner: &array, }; - // `consume` realigns buffers before validating (see #10034), so no - // separate `align_buffers()` is needed here. + // `consume` realigns buffers before validating, so no separate + // `align_buffers()` is needed here. tmp.consume() } @@ -346,12 +346,10 @@ impl ImportedArrowArray<'_> { child_data.push(d.consume()?); } - // 8-byte alignment is legal over the C Data Interface but violates - // arrow-rs's stricter `ArrayData` alignment invariant, so realign the - // imported buffers *before* validating them. `align_buffers(true)` - // realigns and then `build` validates (or skips validation on the FFI - // hot path, except under `force_validate` which always validates). This - // ordering is what makes import work under `force_validate`; see #10034. + // The C Data Interface permits weaker buffer alignment than arrow-rs's + // `ArrayData` invariant, so realign the imported buffers *before* + // validating them. `align_buffers(true)` realigns, then `build` + // validates the realigned buffers under `force_validate`. let mut builder = ArrayDataBuilder::new(self.data_type) .len(len) .offset(offset) @@ -362,10 +360,8 @@ impl ImportedArrowArray<'_> { if let Some(null_count) = null_count { builder = builder.null_count(null_count); } - // SAFETY: `skip_validation` only skips the redundant FFI validation on - // the hot path; `align_buffers(true)` above guarantees the buffers meet - // arrow-rs's alignment invariant, and under `force_validate` `build` - // still runs full validation after realignment. + // SAFETY: `align_buffers(true)` above upholds the alignment invariant + // that `skip_validation` would otherwise leave unchecked. unsafe { builder.skip_validation(true) }.build() } @@ -686,47 +682,39 @@ mod tests_to_then_from_ffi { } // case with nulls is tested in the docs, through the example on this module. - // Functional round-trip coverage of importing an under-aligned `Decimal128` - // buffer over the C Data Interface. NOTE: this is *not* the `force_validate` - // regression guard for #10034 — `arrow-array`'s `force_validate` feature is - // empty and does not forward to `arrow-data/force_validate`, so under - // `force_validate` this test does not actually reach `arrow-data`'s - // validation gate, and under default features the import succeeds even with - // an unfixed `consume` (the buffer still ends up realigned). The true - // CI-reachable regression guard lives in `arrow/tests/ffi_from_ffi.rs`, - // which runs under `cargo test -p arrow --features=force_validate,...,ffi`. + // Round-trip coverage for importing an under-aligned `Decimal128` buffer. + // This is not the `force_validate` regression guard: `arrow-array`'s + // `force_validate` feature does not forward to `arrow-data`, so it cannot + // reach the validation gate. That guard lives in + // `arrow/tests/ffi_from_ffi.rs`. #[test] fn test_decimal128_under_aligned_round_trip() -> Result<()> { - // Model an FFI producer that only guarantees the C Data Interface's - // recommended 8-byte alignment (e.g. arrow-java): an i128 data buffer - // that is 8-aligned but not 16-aligned. + // An i128 buffer that is 8-aligned but not 16-aligned, modeling an FFI + // producer that only guarantees the C Data Interface's recommended + // 8-byte alignment (e.g. arrow-java). let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]); let under_aligned = aligned.slice(8); assert_eq!(under_aligned.as_ptr().align_offset(8), 0); assert_ne!(under_aligned.as_ptr().align_offset(16), 0); - // Export the under-aligned bytes as a `UInt8` array (alignment 1, so it - // is valid Arrow data even under `force_validate`), then re-import them - // as `Decimal128`. This reproduces an under-aligned `Decimal128` buffer - // arriving over the C Data Interface without first having to construct - // an (invalid) under-aligned `Decimal128` `ArrayData` on the producer - // side, which `force_validate` would reject before export. See #10034. + // Export the bytes as a `UInt8` array (alignment 1, valid even under + // `force_validate`) then re-import as `Decimal128`, reproducing an + // under-aligned buffer arriving over the C Data Interface. let producer = ArrayData::builder(DataType::UInt8) .len(under_aligned.len()) .add_buffer(under_aligned) .build()?; let mut array = FFI_ArrowArray::new(&producer); - // Re-describe the exported 32 data bytes as 2 `Decimal128` elements. - // The data pointer (`buffers[1]`) is unchanged and still 8-aligned. + // Re-describe the 32 data bytes as 2 `Decimal128` elements; the data + // pointer is unchanged and still only 8-aligned. array.length = 2; array.null_count = 0; - // SAFETY: the exported buffer holds 32 bytes = 2 little-endian i128 - // values; reinterpreting it as `Decimal128(10, 2)` of length 2 matches. + // SAFETY: 32 bytes = 2 little-endian i128 values, matching + // `Decimal128(10, 2)` of length 2. let imported = unsafe { from_ffi_and_data_type(array, DataType::Decimal128(10, 2)) }?; - // Import must realign the under-aligned buffer to satisfy arrow-rs's - // 16-byte `i128` alignment invariant, regardless of `force_validate`. + // Import must realign the buffer to arrow-rs's 16-byte `i128` alignment. assert_eq!(imported.buffers()[0].as_ptr().align_offset(16), 0); let array = Decimal128Array::from(imported); diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 514906fb89dc..85a1db0f7249 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -815,10 +815,10 @@ impl ArrayData { /// This can be useful for when interacting with data sent over IPC or FFI, that may /// not meet the minimum alignment requirements /// - /// On an import boundary this must be called *before* validation: protocol-legal - /// but under-aligned input (e.g. 8-byte-aligned buffers over the C Data Interface) - /// would otherwise be rejected as an invariant violation. Prefer - /// [`ArrayDataBuilder::align_buffers`], which realigns before `build` validates. + /// On an import boundary this must run *before* validation, otherwise + /// protocol-legal but under-aligned input is rejected as an invariant + /// violation. Prefer [`ArrayDataBuilder::align_buffers`], which realigns + /// before `build` validates. /// /// This also aligns buffers of children data pub fn align_buffers(&mut self) { diff --git a/arrow/tests/ffi_from_ffi.rs b/arrow/tests/ffi_from_ffi.rs index 0641781552f4..64fdab92a520 100644 --- a/arrow/tests/ffi_from_ffi.rs +++ b/arrow/tests/ffi_from_ffi.rs @@ -17,16 +17,11 @@ //! Integration tests for the C Data Interface import path (`from_ffi`). //! -//! This file lives in the `arrow` umbrella crate so that the `arrow` -//! crate's `force_validate` feature (which forwards to -//! `arrow-data/force_validate`) actually activates `arrow-data`'s -//! validation gate when the test runs. The `arrow-array` crate's own -//! `force_validate` feature is empty and does NOT forward to -//! `arrow-data`, so an inline `arrow-array` test cannot exercise the -//! realign-before-validate path under any standard CI invocation. CI runs -//! `cargo test -p arrow --features=force_validate,...,ffi` (arrow.yml), which -//! does activate the gate, so this is a CI-reachable regression guard for -//! . +//! These live in the `arrow` umbrella crate because its `force_validate` +//! feature forwards to `arrow-data/force_validate`, activating the validation +//! gate that the realign-before-validate path must satisfy. The `arrow-array` +//! crate's own `force_validate` feature does not forward to `arrow-data`, so +//! an inline test there cannot reach that gate. #![cfg(feature = "ffi")] @@ -38,50 +33,38 @@ use arrow_schema::{ArrowError, DataType}; use arrow::ffi::from_ffi_and_data_type; -/// Regression test for #10034: `from_ffi` must realign under-aligned but -/// protocol-legal C Data Interface buffers *before* validating them. -/// -/// Under `force_validate` (active here via `arrow/force_validate -> -/// arrow-data/force_validate`), `ImportedArrowArray::consume` builds the -/// `ArrayData` via `ArrayDataBuilder::build`, which validates whenever -/// `force_validate` is set. If `consume` does not realign first (the pre-fix -/// behavior of `ArrayData::new_unchecked`), `build` rejects the -/// 8-byte-aligned (not 16-byte-aligned) `Decimal128` buffer with -/// `InvalidArgumentError("Misaligned buffers[0] ...")` before the import can -/// realign it. With the fix, `consume` realigns first and the import -/// succeeds in every feature configuration. +/// `from_ffi` must realign under-aligned but protocol-legal C Data Interface +/// buffers *before* validating them. Under `force_validate`, importing an +/// 8-byte-aligned (not 16-byte-aligned) `Decimal128` buffer fails with a +/// "Misaligned buffers" error unless `consume` realigns first. #[test] fn test_decimal128_under_aligned_round_trip_force_validate() -> Result<(), ArrowError> { - // Model an FFI producer that only guarantees the C Data Interface's - // recommended 8-byte alignment (e.g. arrow-java): an i128 data buffer - // that is 8-aligned but not 16-aligned. + // An i128 buffer that is 8-aligned but not 16-aligned, modeling an FFI + // producer that only guarantees the C Data Interface's recommended 8-byte + // alignment (e.g. arrow-java). let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]); let under_aligned = aligned.slice(8); assert_eq!(under_aligned.as_ptr().align_offset(8), 0); assert_ne!(under_aligned.as_ptr().align_offset(16), 0); - // Export the under-aligned bytes as a `UInt8` array (alignment 1, so it is - // valid Arrow data even under `force_validate`), then re-import them as - // `Decimal128`. This reproduces an under-aligned `Decimal128` buffer - // arriving over the C Data Interface without having to construct an - // (invalid) under-aligned `Decimal128` `ArrayData` on the producer side, - // which `force_validate` would reject before export. + // Export the bytes as a `UInt8` array (alignment 1, valid even under + // `force_validate`) then re-import as `Decimal128`, reproducing an + // under-aligned buffer arriving over the C Data Interface. let producer = ArrayData::builder(DataType::UInt8) .len(under_aligned.len()) .add_buffer(under_aligned) .build()?; let mut array = FFI_ArrowArray::new(&producer); - // Re-describe the exported 32 data bytes as 2 `Decimal128` elements. The - // data pointer (`buffers[1]`) is unchanged and still only 8-aligned. + // Re-describe the 32 data bytes as 2 `Decimal128` elements; the data + // pointer is unchanged and still only 8-aligned. array.length = 2; array.null_count = 0; - // SAFETY: the exported buffer holds 32 bytes = 2 little-endian i128 values; - // reinterpreting it as `Decimal128(10, 2)` of length 2 matches. + // SAFETY: 32 bytes = 2 little-endian i128 values, matching + // `Decimal128(10, 2)` of length 2. let imported = unsafe { from_ffi_and_data_type(array, DataType::Decimal128(10, 2)) }?; - // Import must realign the under-aligned buffer to satisfy arrow-rs's - // 16-byte `i128` alignment invariant, regardless of `force_validate`. + // Import must realign the buffer to arrow-rs's 16-byte `i128` alignment. assert_eq!(imported.buffers()[0].as_ptr().align_offset(16), 0); let array = Decimal128Array::from(imported);