Skip to content

Commit ebef68a

Browse files
authored
Merge pull request #219 from influxdata/crepererum/sandbox-udf-edition
feat: sandbox lock-down -- "set of UDFs" edition
2 parents 6081853 + 053b31f commit ebef68a

File tree

8 files changed

+205
-2
lines changed

8 files changed

+205
-2
lines changed

guests/evil/src/complex/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//! Overly complex data emitted by the payload.
2+
3+
use arrow::datatypes::DataType;
4+
use datafusion_common::{Result as DataFusionResult, ScalarValue};
5+
use datafusion_expr::{
6+
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
7+
};
8+
9+
pub(crate) mod udf_long_name;
10+
pub(crate) mod udfs_duplicate_names;
11+
pub(crate) mod udfs_many;
12+
13+
/// UDF with a name
14+
#[derive(Debug, PartialEq, Eq, Hash)]
15+
struct NamedUdf(String);
16+
17+
impl ScalarUDFImpl for NamedUdf {
18+
fn as_any(&self) -> &dyn std::any::Any {
19+
self
20+
}
21+
22+
fn name(&self) -> &str {
23+
&self.0
24+
}
25+
26+
fn signature(&self) -> &Signature {
27+
static S: Signature = Signature {
28+
type_signature: TypeSignature::Uniform(0, vec![]),
29+
volatility: Volatility::Immutable,
30+
};
31+
32+
&S
33+
}
34+
35+
fn return_type(&self, _arg_types: &[DataType]) -> DataFusionResult<DataType> {
36+
Ok(DataType::Null)
37+
}
38+
39+
fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> DataFusionResult<ColumnarValue> {
40+
Ok(ColumnarValue::Scalar(ScalarValue::Null))
41+
}
42+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//! Long UDF names.
2+
use std::sync::Arc;
3+
4+
use datafusion_common::Result as DataFusionResult;
5+
use datafusion_expr::ScalarUDFImpl;
6+
7+
use crate::complex::NamedUdf;
8+
9+
/// Returns our evil UDFs.
10+
///
11+
/// The passed `source` is ignored.
12+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
13+
pub(crate) fn udfs(_source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
14+
let limit: usize = std::env::var("limit").unwrap().parse().unwrap();
15+
let name = std::iter::repeat_n('x', limit + 1).collect::<String>();
16+
17+
Ok(vec![
18+
// Emit twice. This shouldn't trigger the "duplicate names" detection though because the length MUST be
19+
// checked BEFORE potentially hashing the names.
20+
Arc::new(NamedUdf(name.clone())),
21+
Arc::new(NamedUdf(name)),
22+
])
23+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//! Duplicate UDF names.
2+
use std::sync::Arc;
3+
4+
use datafusion_common::Result as DataFusionResult;
5+
use datafusion_expr::ScalarUDFImpl;
6+
7+
use crate::complex::NamedUdf;
8+
9+
/// Returns our evil UDFs.
10+
///
11+
/// The passed `source` is ignored.
12+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
13+
pub(crate) fn udfs(_source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
14+
Ok(vec![
15+
Arc::new(NamedUdf("foo".to_owned())),
16+
Arc::new(NamedUdf("bar".to_owned())),
17+
Arc::new(NamedUdf("foo".to_owned())),
18+
])
19+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//! Create many UDFs.
2+
use std::sync::Arc;
3+
4+
use datafusion_common::Result as DataFusionResult;
5+
use datafusion_expr::ScalarUDFImpl;
6+
7+
use crate::complex::NamedUdf;
8+
9+
/// Returns our evil UDFs.
10+
///
11+
/// The passed `source` is ignored.
12+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
13+
pub(crate) fn udfs(_source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
14+
let limit: usize = std::env::var("limit").unwrap().parse().unwrap();
15+
16+
Ok((0..=limit)
17+
.map(|i| Arc::new(NamedUdf(i.to_string())) as _)
18+
.collect())
19+
}

guests/evil/src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use datafusion_expr::ScalarUDFImpl;
88
use datafusion_udf_wasm_guest::export;
99

1010
mod common;
11+
mod complex;
1112
mod env;
1213
mod fs;
1314
mod net;
@@ -35,6 +36,18 @@ impl Evil {
3536
/// Get evil, multiplexed by env.
3637
fn get() -> Self {
3738
match std::env::var("EVIL").expect("evil specified").as_str() {
39+
"complex::udf_long_name" => Self {
40+
root: Box::new(common::root_empty),
41+
udfs: Box::new(complex::udf_long_name::udfs),
42+
},
43+
"complex::udfs_duplicate_names" => Self {
44+
root: Box::new(common::root_empty),
45+
udfs: Box::new(complex::udfs_duplicate_names::udfs),
46+
},
47+
"complex::udfs_many" => Self {
48+
root: Box::new(common::root_empty),
49+
udfs: Box::new(complex::udfs_many::udfs),
50+
},
3851
"env" => Self {
3952
root: Box::new(common::root_empty),
4053
udfs: Box::new(env::udfs),

host/src/lib.rs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22
//!
33
//!
44
//! [DataFusion]: https://datafusion.apache.org/
5-
use std::{any::Any, collections::BTreeMap, hash::Hash, ops::DerefMut, sync::Arc, time::Duration};
5+
use std::{
6+
any::Any,
7+
collections::{BTreeMap, HashSet},
8+
hash::Hash,
9+
ops::DerefMut,
10+
sync::Arc,
11+
time::Duration,
12+
};
613

714
use ::http::HeaderName;
815
use arrow::datatypes::DataType;
@@ -33,7 +40,7 @@ use wasmtime_wasi_http::{
3340

3441
use crate::{
3542
bindings::exports::datafusion_udf_wasm::udf::types as wit_types,
36-
conversion::limits::{CheckedInto, TrustedDataLimits},
43+
conversion::limits::{CheckedInto, ComplexityToken, TrustedDataLimits},
3744
error::{DataFusionResultExt, WasmToDataFusionResultExt, WitDataFusionResultExt},
3845
http::{HttpRequestValidator, RejectAllHttpRequests},
3946
limiter::{Limiter, StaticResourceLimits},
@@ -267,6 +274,9 @@ pub struct WasmPermissions {
267274
/// Trusted data limits.
268275
trusted_data_limits: TrustedDataLimits,
269276

277+
/// Maximum number of UDFs.
278+
max_udfs: usize,
279+
270280
/// Environment variables.
271281
envs: BTreeMap<String, String>,
272282
}
@@ -293,6 +303,7 @@ impl Default for WasmPermissions {
293303
stderr_bytes: 1024, // 1KB
294304
resource_limits: StaticResourceLimits::default(),
295305
trusted_data_limits: TrustedDataLimits::default(),
306+
max_udfs: 20,
296307
envs: BTreeMap::default(),
297308
}
298309
}
@@ -371,6 +382,19 @@ impl WasmPermissions {
371382
}
372383
}
373384

385+
/// Get the maximum number of UDFs that a payload/guest can produce.
386+
pub fn max_udfs(&self) -> usize {
387+
self.max_udfs
388+
}
389+
390+
/// Set the maximum number of UDFs that a payload/guest can produce.
391+
pub fn with_max_udfs(self, limit: usize) -> Self {
392+
Self {
393+
max_udfs: limit,
394+
..self
395+
}
396+
}
397+
374398
/// Add environment variable.
375399
pub fn with_env(mut self, key: String, value: String) -> Self {
376400
self.envs.insert(key, value);
@@ -571,10 +595,18 @@ impl WasmScalarUdf {
571595
)?
572596
.convert_err(permissions.trusted_data_limits.clone())
573597
.context("scalar_udfs")?;
598+
if udf_resources.len() > permissions.max_udfs {
599+
return Err(DataFusionError::ResourcesExhausted(format!(
600+
"guest returned too many UDFs: got={}, limit={}",
601+
udf_resources.len(),
602+
permissions.max_udfs,
603+
)));
604+
}
574605

575606
let store = Arc::new(Mutex::new(store));
576607

577608
let mut udfs = Vec::with_capacity(udf_resources.len());
609+
let mut names_seen = HashSet::with_capacity(udf_resources.len());
578610
for resource in udf_resources {
579611
let mut store_guard = store.lock().await;
580612
let store2: &mut Store<WasmStateImpl> = &mut store_guard;
@@ -587,6 +619,13 @@ impl WasmScalarUdf {
587619
"call ScalarUdf::name",
588620
Some(&store_guard.data().stderr.contents()),
589621
)?;
622+
ComplexityToken::new(permissions.trusted_data_limits.clone())?
623+
.check_identifier(&name)?;
624+
if !names_seen.insert(name.clone()) {
625+
return Err(DataFusionError::External(
626+
format!("non-unique UDF name: '{name}'").into(),
627+
));
628+
}
590629

591630
let store2: &mut Store<WasmStateImpl> = &mut store_guard;
592631
let signature: Signature = bindings
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
use datafusion_udf_wasm_host::{WasmPermissions, conversion::limits::TrustedDataLimits};
2+
3+
use crate::integration_tests::evil::test_utils::{try_scalar_udfs, try_scalar_udfs_with_env};
4+
5+
#[tokio::test]
6+
async fn test_udf_long_name() {
7+
let err = try_scalar_udfs_with_env(
8+
"complex::udf_long_name",
9+
&[(
10+
"limit",
11+
&TrustedDataLimits::default()
12+
.max_identifier_length
13+
.to_string(),
14+
)],
15+
)
16+
.await
17+
.unwrap_err();
18+
19+
insta::assert_snapshot!(
20+
err,
21+
@"Resources exhausted: identifier length: got=51, limit=50");
22+
}
23+
24+
#[tokio::test]
25+
async fn test_udfs_duplicate_names() {
26+
let err = try_scalar_udfs("complex::udfs_duplicate_names")
27+
.await
28+
.unwrap_err();
29+
30+
insta::assert_snapshot!(
31+
err,
32+
@"External error: non-unique UDF name: 'foo'");
33+
}
34+
35+
#[tokio::test]
36+
async fn test_udfs_many() {
37+
let err = try_scalar_udfs_with_env(
38+
"complex::udfs_many",
39+
&[("limit", &WasmPermissions::default().max_udfs().to_string())],
40+
)
41+
.await
42+
.unwrap_err();
43+
44+
insta::assert_snapshot!(
45+
err,
46+
@"Resources exhausted: guest returned too many UDFs: got=21, limit=20");
47+
}

host/tests/integration_tests/evil/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod complex;
12
mod env;
23
mod fs;
34
mod net;

0 commit comments

Comments
 (0)