Skip to content

Commit 78c7d9d

Browse files
committed
feat: check over overlong UDF names
1 parent ce14fac commit 78c7d9d

File tree

6 files changed

+96
-40
lines changed

6 files changed

+96
-40
lines changed

guests/evil/src/complex/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,41 @@
11
//! Overly complex data emitted by the payload.
22
3+
use arrow::datatypes::DataType;
4+
use datafusion_common::{Result as DataFusionResult, ScalarValue};
5+
use datafusion_expr::{
6+
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
7+
};
8+
9+
pub(crate) mod udf_long_name;
310
pub(crate) mod udfs_duplicate_names;
11+
12+
/// UDF with a name
13+
#[derive(Debug, PartialEq, Eq, Hash)]
14+
struct NamedUdf(String);
15+
16+
impl ScalarUDFImpl for NamedUdf {
17+
fn as_any(&self) -> &dyn std::any::Any {
18+
self
19+
}
20+
21+
fn name(&self) -> &str {
22+
&self.0
23+
}
24+
25+
fn signature(&self) -> &Signature {
26+
static S: Signature = Signature {
27+
type_signature: TypeSignature::Uniform(0, vec![]),
28+
volatility: Volatility::Immutable,
29+
};
30+
31+
&S
32+
}
33+
34+
fn return_type(&self, _arg_types: &[DataType]) -> DataFusionResult<DataType> {
35+
Ok(DataType::Null)
36+
}
37+
38+
fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> DataFusionResult<ColumnarValue> {
39+
Ok(ColumnarValue::Scalar(ScalarValue::Null))
40+
}
41+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//! Long UDF names.
2+
use std::sync::Arc;
3+
4+
use datafusion_common::Result as DataFusionResult;
5+
use datafusion_expr::ScalarUDFImpl;
6+
7+
use crate::complex::NamedUdf;
8+
9+
/// Returns our evil UDFs.
10+
///
11+
/// The passed `source` is ignored.
12+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
13+
pub(crate) fn udfs(_source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
14+
let limit: usize = std::env::var("limit").unwrap().parse().unwrap();
15+
let name = std::iter::repeat_n('x', limit + 1).collect::<String>();
16+
17+
Ok(vec![
18+
// Emit twice. This shouldn't trigger the "duplicate names" detection though because the length MUST be
19+
// checked BEFORE potentially hashing the names.
20+
Arc::new(NamedUdf(name.clone())),
21+
Arc::new(NamedUdf(name)),
22+
])
23+
}
Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,19 @@
11
//! Duplicate UDF names.
22
use std::sync::Arc;
33

4-
use arrow::datatypes::DataType;
5-
use datafusion_common::{Result as DataFusionResult, ScalarValue};
6-
use datafusion_expr::{
7-
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
8-
};
4+
use datafusion_common::Result as DataFusionResult;
5+
use datafusion_expr::ScalarUDFImpl;
96

10-
/// UDF with a name
11-
#[derive(Debug, PartialEq, Eq, Hash)]
12-
struct NamedUdf(&'static str);
13-
14-
impl ScalarUDFImpl for NamedUdf {
15-
fn as_any(&self) -> &dyn std::any::Any {
16-
self
17-
}
18-
19-
fn name(&self) -> &str {
20-
self.0
21-
}
22-
23-
fn signature(&self) -> &Signature {
24-
static S: Signature = Signature {
25-
type_signature: TypeSignature::Uniform(0, vec![]),
26-
volatility: Volatility::Immutable,
27-
};
28-
29-
&S
30-
}
31-
32-
fn return_type(&self, _arg_types: &[DataType]) -> DataFusionResult<DataType> {
33-
Ok(DataType::Null)
34-
}
35-
36-
fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> DataFusionResult<ColumnarValue> {
37-
Ok(ColumnarValue::Scalar(ScalarValue::Null))
38-
}
39-
}
7+
use crate::complex::NamedUdf;
408

419
/// Returns our evil UDFs.
4210
///
4311
/// The passed `source` is ignored.
4412
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
4513
pub(crate) fn udfs(_source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
4614
Ok(vec![
47-
Arc::new(NamedUdf("foo")),
48-
Arc::new(NamedUdf("bar")),
49-
Arc::new(NamedUdf("foo")),
15+
Arc::new(NamedUdf("foo".to_owned())),
16+
Arc::new(NamedUdf("bar".to_owned())),
17+
Arc::new(NamedUdf("foo".to_owned())),
5018
])
5119
}

guests/evil/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ impl Evil {
3535
/// Get evil, multiplexed by env.
3636
fn get() -> Self {
3737
match std::env::var("EVIL").expect("evil specified").as_str() {
38+
"complex::udf_long_name" => Self {
39+
root: Box::new(common::root_empty),
40+
udfs: Box::new(complex::udf_long_name::udfs),
41+
},
3842
"complex::udfs_duplicate_names" => Self {
3943
root: Box::new(common::root_empty),
4044
udfs: Box::new(complex::udfs_duplicate_names::udfs),

host/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use wasmtime_wasi_http::{
4040

4141
use crate::{
4242
bindings::exports::datafusion_udf_wasm::udf::types as wit_types,
43-
conversion::limits::{CheckedInto, TrustedDataLimits},
43+
conversion::limits::{CheckedInto, ComplexityToken, TrustedDataLimits},
4444
error::{DataFusionResultExt, WasmToDataFusionResultExt, WitDataFusionResultExt},
4545
http::{HttpRequestValidator, RejectAllHttpRequests},
4646
limiter::{Limiter, StaticResourceLimits},
@@ -595,6 +595,8 @@ impl WasmScalarUdf {
595595
"call ScalarUdf::name",
596596
Some(&store_guard.data().stderr.contents()),
597597
)?;
598+
ComplexityToken::new(permissions.trusted_data_limits.clone())?
599+
.check_identifier(&name)?;
598600
if !names_seen.insert(name.clone()) {
599601
return Err(DataFusionError::External(
600602
format!("non-unique UDF name: '{name}'").into(),

host/tests/integration_tests/evil/complex.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,25 @@
1-
use crate::integration_tests::evil::test_utils::try_scalar_udfs;
1+
use datafusion_udf_wasm_host::conversion::limits::TrustedDataLimits;
2+
3+
use crate::integration_tests::evil::test_utils::{try_scalar_udfs, try_scalar_udfs_with_env};
4+
5+
#[tokio::test]
6+
async fn test_udf_long_name() {
7+
let err = try_scalar_udfs_with_env(
8+
"complex::udf_long_name",
9+
&[(
10+
"limit",
11+
&TrustedDataLimits::default()
12+
.max_identifier_length
13+
.to_string(),
14+
)],
15+
)
16+
.await
17+
.unwrap_err();
18+
19+
insta::assert_snapshot!(
20+
err,
21+
@"Resources exhausted: identifier length: got=51, limit=50");
22+
}
223

324
#[tokio::test]
425
async fn test_udfs_duplicate_names() {

0 commit comments

Comments
 (0)