From 9e24e6bab1d3502db298ff7c16310970a026df62 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 29 May 2026 04:19:58 +0530 Subject: [PATCH] Extract SPOG org-id from cluster httpPath for non-Thrift requests For all-purpose-compute Thrift connections on SPOG (custom-URL) hosts the httpPath is /sql/protocolv1/o// and the workspace ID is encoded in the path itself. PoPP routes the Thrift request correctly off the /o// segment, so the connection succeeds without an explicit ?o= query parameter. Other requests on the same connection (telemetry POSTs to /telemetry-ext, feature-flag fetches, etc.) hit different paths that don't carry the workspace ID. Previously parseCustomHeaders only looked at ?o= in the httpPath, so the x-databricks-org-id header was never set for cluster URLs without ?o=. On SPOG hosts PoPP then had no workspace context for these requests and redirected them to /login (HTTP 303), silently dropping telemetry. Extend parseCustomHeaders to also extract the workspace ID from the cluster path segment as a fallback when ?o= is absent. Priority order is preserved: explicit http.header.x-databricks-org-id > ?o= query param > /o// path segment. Verified end-to-end against peco.azuredatabricks.net (Prod SPOG) cluster 1214-195625-gtrwbe64 via OSS JDBC PAT: telemetry POST /telemetry-ext now returns HTTP 200 instead of HTTP 303 redirect to /login. Signed-off-by: Madhavendra Rathore --- NEXT_CHANGELOG.md | 1 + .../api/impl/DatabricksConnectionContext.java | 23 +++++- .../impl/DatabricksConnectionContextTest.java | 74 +++++++++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index bef82dd23..4a5288e96 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,7 @@ ### Fixed - Fixed `setCatalog()` and `setSchema()` producing invalid SQL (e.g. `SET CATALOG ``name``) when the catalog or schema name was passed already wrapped in backticks. Backticks are now stripped before wrapping, and `getCatalog()`/`getSchema()` return the bare identifier name. +- Fixed silent telemetry loss on SPOG (custom-URL) hosts when connecting to an all-purpose cluster via a Thrift `httpPath` like `sql/protocolv1/o//`. The driver now extracts the workspace ID from the cluster path segment (in addition to the existing `?o=` query-param extraction) and sets it as the `x-databricks-org-id` header on every outgoing request. Without this, telemetry POSTs to `/telemetry-ext` were redirected to `/login` because the workspace context could not be inferred from the URL. --- *Note: When making changes, please add your change under the appropriate section diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java index 956695ab5..69d7d3a4e 100644 --- a/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java +++ b/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java @@ -1322,6 +1322,11 @@ private String getParameter(DatabricksJdbcUrlParams key, String defaultValue) { private static final String ORG_ID_HEADER = "x-databricks-org-id"; private static final String ORG_ID_QUERY_PARAM = "o"; + // Matches the workspace ID inside an all-purpose-compute Thrift path of the form + // [/]sql/protocolv1/o//[/...][?...]. + private static final java.util.regex.Pattern CLUSTER_PATH_ORG_ID_PATTERN = + java.util.regex.Pattern.compile("(?:^|/)sql/protocolv1/o/(\\d+)/[^/?]+"); + private Map parseCustomHeaders(ImmutableMap parameters) { String filterPrefix = DatabricksJdbcUrlParams.HTTP_HEADERS.getParamName(); @@ -1334,7 +1339,11 @@ private Map parseCustomHeaders(ImmutableMap para entry -> entry.getKey().substring(filterPrefix.length()), Map.Entry::getValue))); - // Extract org ID from ?o= in httpPath for SPOG routing + // Extract org ID for SPOG routing. Two sources, in priority order: + // 1. ?o= query parameter in httpPath (warehouse paths typically use this) + // 2. /sql/protocolv1/o// path segment (all-purpose cluster paths) + // Without this, non-Thrift endpoints (e.g. /telemetry-ext) lack workspace context on + // SPOG hosts and PoPP redirects them to /login. if (!headers.containsKey(ORG_ID_HEADER)) { String httpPath = parameters.getOrDefault( @@ -1358,6 +1367,18 @@ private Map parseCustomHeaders(ImmutableMap para "SPOG header extraction: malformed httpPath, skipping org-id extraction: " + e.getMessage()); } + + if (!headers.containsKey(ORG_ID_HEADER) && !httpPath.isEmpty()) { + Matcher clusterMatch = CLUSTER_PATH_ORG_ID_PATTERN.matcher(httpPath); + if (clusterMatch.find()) { + String orgId = clusterMatch.group(1); + headers.put(ORG_ID_HEADER, orgId); + LOGGER.debug( + "SPOG header extraction: injecting {}={} (extracted from cluster path segment)", + ORG_ID_HEADER, + orgId); + } + } } else { LOGGER.debug( "SPOG header extraction: {} already set by caller, not extracting from httpPath", diff --git a/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java b/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java index f207c1537..ec30f29d6 100644 --- a/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java +++ b/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java @@ -1458,6 +1458,80 @@ void testSpogContext_explicitHeaderTakesPrecedence() throws DatabricksSQLExcepti assertEquals("fromheader", headers.get("x-databricks-org-id")); } + @Test + void testSpogContext_extractsOrgIdFromClusterPathSegment() throws DatabricksSQLException { + // All-purpose-compute Thrift path embeds the workspace ID in /o//. + // No ?o= query param — driver should still extract org-id so non-Thrift endpoints + // (e.g. /telemetry-ext) get the x-databricks-org-id header on SPOG hosts. + String url = + "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;" + + "httpPath=sql/protocolv1/o/6051921418418893/0528-220959-uzmcn1qt"; + Properties props = new Properties(); + props.put("user", "token"); + props.put("password", "test-token"); + IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props); + + Map headers = ctx.getCustomHeaders(); + assertEquals("6051921418418893", headers.get("x-databricks-org-id")); + } + + @Test + void testSpogContext_clusterPathWithLeadingSlashAlsoExtracts() throws DatabricksSQLException { + String url = + "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;" + + "httpPath=/sql/protocolv1/o/6051921418418893/0528-220959-uzmcn1qt"; + Properties props = new Properties(); + props.put("user", "token"); + props.put("password", "test-token"); + IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props); + + assertEquals("6051921418418893", ctx.getCustomHeaders().get("x-databricks-org-id")); + } + + @Test + void testSpogContext_queryParamWinsOverClusterPathSegment() throws DatabricksSQLException { + // ?o= takes precedence when both forms are present (matches priority order in the code). + String url = + "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;" + + "httpPath=sql/protocolv1/o/111/0528-220959-uzmcn1qt?o=222"; + Properties props = new Properties(); + props.put("user", "token"); + props.put("password", "test-token"); + IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props); + + assertEquals("222", ctx.getCustomHeaders().get("x-databricks-org-id")); + } + + @Test + void testSpogContext_explicitHeaderTakesPrecedenceOverClusterPath() + throws DatabricksSQLException { + String url = + "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;" + + "httpPath=sql/protocolv1/o/111/0528-220959-uzmcn1qt;" + + "http.header.x-databricks-org-id=fromheader"; + Properties props = new Properties(); + props.put("user", "token"); + props.put("password", "test-token"); + IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props); + + assertEquals("fromheader", ctx.getCustomHeaders().get("x-databricks-org-id")); + } + + @Test + void testSpogContext_warehousePathWithoutQueryParamHasNoOrgId() throws DatabricksSQLException { + // Warehouse paths never embed the workspace ID, so without ?o= no header is set. + // Regression guard for the cluster-path fallback (it must not match warehouse paths). + String url = + "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;" + + "httpPath=/sql/1.0/warehouses/abc123"; + Properties props = new Properties(); + props.put("user", "token"); + props.put("password", "test-token"); + IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props); + + assertFalse(ctx.getCustomHeaders().containsKey("x-databricks-org-id")); + } + @Test public void testDefaultGetterCoverage() throws DatabricksSQLException { IDatabricksConnectionContext ctx =