Skip to content

Commit 7f0b936

Browse files
gruuyaJanKaul
authored andcommitted
fix: make fake object store url be unique, human readable and always be parsable
1 parent 767b99c commit 7f0b936

File tree

2 files changed

+28
-29
lines changed

2 files changed

+28
-29
lines changed

catalogs/iceberg-file-catalog/src/lib.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -896,10 +896,7 @@ pub mod tests {
896896
.await
897897
.unwrap();
898898

899-
assert_eq!(
900-
std::str::from_utf8(&version_hint).unwrap(),
901-
"s3://warehouse/tpch/lineitem/metadata/v1.metadata.json"
902-
);
899+
assert_eq!(std::str::from_utf8(&version_hint).unwrap(), "1");
903900

904901
let files = object_store.list(None).collect::<Vec<_>>().await;
905902

datafusion_iceberg/src/table.rs

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -335,24 +335,17 @@ impl TableProvider for DataFusionTable {
335335
// Create a fake object store URL. Different table paths should produce fake URLs
336336
// that differ in the host name, because DF's DefaultObjectStoreRegistry only takes
337337
// hostname into account for object store keys
338-
fn fake_object_store_url(table_location_url: &str) -> Option<ObjectStoreUrl> {
339-
let mut u = url::Url::parse(table_location_url).ok()?;
340-
u.set_host(Some(&format!(
341-
"{}-{}",
342-
u.host_str().unwrap_or(""),
343-
// Hex-encode the path to ensure it produces a valid hostname
344-
u.path()
345-
.as_bytes()
346-
.iter()
347-
.map(|b| format!("{b:02x}"))
348-
.collect::<Vec<_>>()
349-
.join("")
350-
)))
351-
.unwrap();
352-
u.set_path("");
353-
u.set_query(None);
354-
u.set_fragment(None);
355-
ObjectStoreUrl::parse(&u).ok()
338+
fn fake_object_store_url(table_location_url: &str) -> ObjectStoreUrl {
339+
// Use quasi url-encoding to escape the characters not allowed in host names, (i.e. for `/` use
340+
// `-2F` instead of `%2F`)
341+
ObjectStoreUrl::parse(format!(
342+
"iceberg-rust://{}",
343+
table_location_url
344+
.replace('-', "-2D")
345+
.replace('/', "-2F")
346+
.replace(':', "-3A")
347+
))
348+
.expect("Invalid object store url.")
356349
}
357350

358351
#[allow(clippy::too_many_arguments)]
@@ -380,8 +373,7 @@ async fn table_scan(
380373
.unwrap_or_else(|| table.current_schema(None).unwrap().clone());
381374

382375
// Create a unique URI for this particular object store
383-
let object_store_url = fake_object_store_url(&table.metadata().location)
384-
.unwrap_or_else(ObjectStoreUrl::local_filesystem);
376+
let object_store_url = fake_object_store_url(&table.metadata().location);
385377
session
386378
.runtime_env()
387379
.register_object_store(object_store_url.as_ref(), table.object_store());
@@ -1158,8 +1150,7 @@ async fn write_parquet_files(
11581150

11591151
let bucket = Bucket::from_path(&metadata.location).map_err(DataFusionIcebergError::from)?;
11601152

1161-
let object_store_url =
1162-
fake_object_store_url(&metadata.location).unwrap_or(ObjectStoreUrl::local_filesystem());
1153+
let object_store_url = fake_object_store_url(&metadata.location);
11631154

11641155
context.runtime_env().register_object_store(
11651156
&object_store_url
@@ -2499,12 +2490,23 @@ mod tests {
24992490
fn test_fake_object_store_url() {
25002491
assert_eq!(
25012492
fake_object_store_url("s3://a"),
2502-
Some(ObjectStoreUrl::parse("s3://a-").unwrap()),
2493+
ObjectStoreUrl::parse("iceberg-rust://s3-3A-2F-2Fa").unwrap(),
25032494
);
25042495
assert_eq!(
25052496
fake_object_store_url("s3://a/b"),
2506-
Some(ObjectStoreUrl::parse("s3://a-2f62").unwrap()),
2497+
ObjectStoreUrl::parse("iceberg-rust://s3-3A-2F-2Fa-2Fb").unwrap(),
2498+
);
2499+
assert_eq!(
2500+
fake_object_store_url("/warehouse/tpch/lineitem"),
2501+
ObjectStoreUrl::parse("iceberg-rust://-2Fwarehouse-2Ftpch-2Flineitem").unwrap()
2502+
);
2503+
assert_ne!(
2504+
fake_object_store_url("s3://a/-/--"),
2505+
fake_object_store_url("s3://a/--/-"),
2506+
);
2507+
assert_ne!(
2508+
fake_object_store_url("s3://a/table-2Fpath"),
2509+
fake_object_store_url("s3://a/table/path"),
25072510
);
2508-
assert_eq!(fake_object_store_url("invalid url"), None);
25092511
}
25102512
}

0 commit comments

Comments
 (0)