From 6a261768505d6229c494a5bef73fa14f8573ee6d Mon Sep 17 00:00:00 2001 From: geruh Date: Mon, 22 Jun 2026 22:32:44 -0700 Subject: [PATCH 1/3] fix(namespace): allow create_branch bootstrap on managed tables create_table_version resolves a branch via resolve_branch_for_commit: a branch chain accepts a commit when the BranchContents ref already exists or when the chain has no committed versions yet (the create_branch bootstrap, whose first commit precedes its ref). A chain with versions but no ref is a zombie and stays rejected. Adds test_create_branch_on_managed_dataset_succeeds; managed zombie rejection through create_table_version is already covered by test_branch_ops_reject_zombie_branch. Co-authored-by: Claude Opus 4.8 (1M context) --- rust/lance-namespace-impls/src/dir.rs | 70 ++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 681cfa430f2..2c20cdadd54 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1281,6 +1281,47 @@ impl DirectoryNamespace { .uri) } + /// Resolves the branch URI for a `create_table_version` commit. + /// `BranchContents` is the source of truth, so check the ref first: a + /// registered branch commits directly. With no ref, accept the commit only on + /// an empty chain (the `create_branch` bootstrap, whose first commit precedes + /// its ref); reject a chain that already holds committed versions as a zombie. + async fn resolve_branch_for_commit(&self, table_uri: &str, branch: &str) -> Result { + let main = self + .configured_builder(table_uri) + .load() + .await + .map_err(|e| { + lance_core::Error::from(NamespaceError::TableNotFound { + message: format!("table at '{}' not found: {}", table_uri, e), + }) + })?; + let branch_uri = main.branch_location().find_branch(Some(branch))?.uri; + match main.branches().get(branch).await { + Ok(_) => Ok(branch_uri), + Err(lance_core::Error::RefNotFound { .. }) => { + if self.branch_has_committed_versions(&branch_uri).await? { + return Err(NamespaceError::TableNotFound { + message: format!( + "branch '{}' not found for table at '{}'", + branch, table_uri + ), + } + .into()); + } + Ok(branch_uri) + } + Err(e) => Err(e), + } + } + + async fn branch_has_committed_versions(&self, branch_uri: &str) -> Result { + let versions = self + .list_table_versions_from_storage(branch_uri, false, Some(1)) + .await?; + Ok(!versions.is_empty()) + } + fn validate_dir_only_properties( properties: Option<&HashMap>, operation: &str, @@ -3140,7 +3181,7 @@ impl LanceNamespace for DirectoryNamespace { let branch = Self::normalized_branch(request.branch.as_deref())?; let table_uri = self.resolve_table_location(&request.id).await?; let table_uri = match branch { - Some(b) => self.resolve_branch_location(&table_uri, b).await?, + Some(b) => self.resolve_branch_for_commit(&table_uri, b).await?, None => table_uri, }; @@ -12448,4 +12489,31 @@ mod tests { let result = namespace.alter_table_drop_columns(request).await; assert!(result.is_err(), "Should fail when table does not exist"); } + + #[tokio::test] + async fn test_create_branch_on_managed_dataset_succeeds() { + use lance::dataset::builder::DatasetBuilder; + + let temp = TempStdDir::default(); + let ns = create_managed_namespace(temp.to_str().unwrap()).await; + let table_id = vec!["t".to_string()]; + let mut main = create_managed_table(&ns, &table_id).await; + + let fork_version = main.version().version; + let branch = main + .create_branch("exp", fork_version, None) + .await + .expect("create_branch failed"); + assert_eq!(branch.manifest.branch.as_deref(), Some("exp")); + assert_eq!(scan_id_column(&branch).await, vec![1, 2]); + + let reopened = DatasetBuilder::from_namespace(ns.clone(), table_id.clone()) + .await + .unwrap() + .with_branch("exp", None) + .load() + .await + .expect("reopen branch failed"); + assert_eq!(scan_id_column(&reopened).await, vec![1, 2]); + } } From 06e5beaff50540ae5bf00bb65f5887abd7f66794 Mon Sep 17 00:00:00 2001 From: geruh Date: Tue, 23 Jun 2026 09:51:46 -0700 Subject: [PATCH 2/3] fix(namespace): resolve branch commits from the object-store Path on Windows create_table_version now writes and zombie-checks a branch at the same object-store Path from find_branch(), instead of round-tripping the branch URI through object_store_path_from_uri. On Windows that round-trip resolves elsewhere, so the version listing came back empty, zombies looked like bootstraps, and test_branch_ops_reject_zombie_branch failed on windows-build. resolve_branch_for_commit returns (uri, path) so the commit write and the branch_has_committed_versions check share one basis. The shared listing logic is extracted into list_versions_under, which takes a Path directly. Co-authored-by: Cursor --- rust/lance-namespace-impls/src/dir.rs | 59 +++++++++++++++++++-------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 2c20cdadd54..e81cf96aab9 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1281,12 +1281,19 @@ impl DirectoryNamespace { .uri) } - /// Resolves the branch URI for a `create_table_version` commit. + /// Resolves a branch to its `(uri, object-store path)` for a + /// `create_table_version` commit, so the commit write and the zombie check + /// share one path. + /// /// `BranchContents` is the source of truth, so check the ref first: a /// registered branch commits directly. With no ref, accept the commit only on /// an empty chain (the `create_branch` bootstrap, whose first commit precedes /// its ref); reject a chain that already holds committed versions as a zombie. - async fn resolve_branch_for_commit(&self, table_uri: &str, branch: &str) -> Result { + async fn resolve_branch_for_commit( + &self, + table_uri: &str, + branch: &str, + ) -> Result<(String, Path)> { let main = self .configured_builder(table_uri) .load() @@ -1296,11 +1303,14 @@ impl DirectoryNamespace { message: format!("table at '{}' not found: {}", table_uri, e), }) })?; - let branch_uri = main.branch_location().find_branch(Some(branch))?.uri; + let branch_location = main.branch_location().find_branch(Some(branch))?; match main.branches().get(branch).await { - Ok(_) => Ok(branch_uri), + Ok(_) => Ok((branch_location.uri, branch_location.path)), Err(lance_core::Error::RefNotFound { .. }) => { - if self.branch_has_committed_versions(&branch_uri).await? { + if self + .branch_has_committed_versions(&branch_location.path) + .await? + { return Err(NamespaceError::TableNotFound { message: format!( "branch '{}' not found for table at '{}'", @@ -1309,17 +1319,17 @@ impl DirectoryNamespace { } .into()); } - Ok(branch_uri) + Ok((branch_location.uri, branch_location.path)) } Err(e) => Err(e), } } - async fn branch_has_committed_versions(&self, branch_uri: &str) -> Result { - let versions = self - .list_table_versions_from_storage(branch_uri, false, Some(1)) - .await?; - Ok(!versions.is_empty()) + async fn branch_has_committed_versions(&self, branch_path: &Path) -> Result { + Ok(!self + .list_versions_under(branch_path, false, Some(1)) + .await? + .is_empty()) } fn validate_dir_only_properties( @@ -1393,6 +1403,20 @@ impl DirectoryNamespace { limit: Option, ) -> Result> { let table_path = self.object_store_path_from_uri(table_uri)?; + self.list_versions_under(&table_path, descending, limit) + .await + } + + /// List committed manifest versions under `table_path`'s `_versions/` + /// directory. Takes the object-store `Path` directly because converting a URI + /// back to a path (via `object_store_path_from_uri`) can diverge from the real + /// storage location on Windows and miss the manifests. + async fn list_versions_under( + &self, + table_path: &Path, + descending: bool, + limit: Option, + ) -> Result> { let versions_dir = table_path.clone().join(VERSIONS_DIR); let manifest_metas: Vec<_> = self .object_store @@ -1402,8 +1426,8 @@ impl DirectoryNamespace { .map_err(|e| { lance_core::Error::from(NamespaceError::Internal { message: format!( - "Failed to list manifest files for table at '{}': {}", - table_uri, e + "Failed to list manifest files under '{}': {}", + versions_dir, e ), }) })?; @@ -3180,16 +3204,17 @@ impl LanceNamespace for DirectoryNamespace { self.record_op("create_table_version"); let branch = Self::normalized_branch(request.branch.as_deref())?; let table_uri = self.resolve_table_location(&request.id).await?; - let table_uri = match branch { + let (table_uri, table_path) = match branch { Some(b) => self.resolve_branch_for_commit(&table_uri, b).await?, - None => table_uri, + None => { + let table_path = self.object_store_path_from_uri(&table_uri)?; + (table_uri, table_path) + } }; let staging_manifest_path = &request.manifest_path; let version = request.version as u64; - let table_path = self.object_store_path_from_uri(&table_uri)?; - // Determine naming scheme from request, default to V2 let naming_scheme = match request.naming_scheme.as_deref() { Some("V1") => ManifestNamingScheme::V1, From 0da52eda3e1e5101904b7a7576f05621fe934c36 Mon Sep 17 00:00:00 2001 From: geruh Date: Tue, 23 Jun 2026 11:31:22 -0700 Subject: [PATCH 3/3] test(namespace): stage zombie manifest at find_branch path on Windows On Windows, Path::from(format!(branch_location.path, ...)) diverges from find_branch().path.join(VERSIONS_DIR), so zombie detection missed the staged manifest and create_table_version accepted the commit. Stage with find_branch().path.join(VERSIONS_DIR). Co-authored-by: Cursor --- rust/lance-namespace-impls/src/dir.rs | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index e81cf96aab9..5305612b13a 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1281,9 +1281,7 @@ impl DirectoryNamespace { .uri) } - /// Resolves a branch to its `(uri, object-store path)` for a - /// `create_table_version` commit, so the commit write and the zombie check - /// share one path. + /// Resolves a branch to its `(uri, object-store path)` for `create_table_version`. /// /// `BranchContents` is the source of truth, so check the ref first: a /// registered branch commits directly. With no ref, accept the commit only on @@ -1407,10 +1405,9 @@ impl DirectoryNamespace { .await } - /// List committed manifest versions under `table_path`'s `_versions/` - /// directory. Takes the object-store `Path` directly because converting a URI - /// back to a path (via `object_store_path_from_uri`) can diverge from the real - /// storage location on Windows and miss the manifests. + /// List committed manifest versions under `table_path/_versions/`. + /// `table_path` must be an object-store `Path`; converting a URI to a path + /// can miss manifests on Windows. async fn list_versions_under( &self, table_path: &Path, @@ -5684,8 +5681,6 @@ mod tests { let (namespace, _temp_dir) = create_test_namespace().await; create_scalar_table(&namespace, "users").await; - // Stage a real (loadable) manifest under tree/ghost/_versions/ without - // create_branch, so the path exists but has no BranchContents ref. let dataset = open_dataset(&namespace, "users").await; let store = dataset.object_store(None).await.unwrap(); let manifest = store @@ -5710,15 +5705,15 @@ mod tests { .bytes() .await .unwrap(); - let zombie = Path::from(format!( - "{}/tree/ghost/_versions/{}", - dataset.branch_location().path, - manifest.location.filename().unwrap() - )); + let zombie = dataset + .branch_location() + .find_branch(Some("ghost")) + .unwrap() + .path + .join(VERSIONS_DIR) + .join(manifest.location.filename().unwrap()); store.inner.put(&zombie, bytes.into()).await.unwrap(); - // The directory is physically present, but the source of truth has no - // such branch -- this is what makes every op below reject it. assert!(dataset.branches().get("ghost").await.is_err()); fn rejected(label: &str, r: Result) {