From 9e24e6bab1d3502db298ff7c16310970a026df62 Mon Sep 17 00:00:00 2001
From: Madhavendra Rathore <madhavendra.rathore@databricks.com>
Date: Fri, 29 May 2026 04:19:58 +0530
Subject: [PATCH] Extract SPOG org-id from cluster httpPath for non-Thrift
 requests

For all-purpose-compute Thrift connections on SPOG (custom-URL) hosts
the httpPath is /sql/protocolv1/o/<workspace-id>/<cluster-id> and the
workspace ID is encoded in the path itself. PoPP routes the Thrift
request correctly off the /o/<wsid>/ segment, so the connection succeeds
without an explicit ?o= query parameter.

Other requests on the same connection (telemetry POSTs to /telemetry-ext,
feature-flag fetches, etc.) hit different paths that don't carry the
workspace ID. Previously parseCustomHeaders only looked at ?o= in the
httpPath, so the x-databricks-org-id header was never set for cluster
URLs without ?o=. On SPOG hosts PoPP then had no workspace context for
these requests and redirected them to /login (HTTP 303), silently
dropping telemetry.

Extend parseCustomHeaders to also extract the workspace ID from the
cluster path segment as a fallback when ?o= is absent. Priority order
is preserved: explicit http.header.x-databricks-org-id > ?o= query
param > /o/<wsid>/ path segment.

Verified end-to-end against peco.azuredatabricks.net (Prod SPOG) cluster
1214-195625-gtrwbe64 via OSS JDBC PAT: telemetry POST /telemetry-ext now
returns HTTP 200 instead of HTTP 303 redirect to /login.

Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
---
 NEXT_CHANGELOG.md                             |  1 +
 .../api/impl/DatabricksConnectionContext.java | 23 +++++-
 .../impl/DatabricksConnectionContextTest.java | 74 +++++++++++++++++++
 3 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md
index bef82dd23..4a5288e96 100644
--- a/NEXT_CHANGELOG.md
+++ b/NEXT_CHANGELOG.md
@@ -8,6 +8,7 @@
 
 ### Fixed
 - Fixed `setCatalog()` and `setSchema()` producing invalid SQL (e.g. `SET CATALOG ``name``) when the catalog or schema name was passed already wrapped in backticks. Backticks are now stripped before wrapping, and `getCatalog()`/`getSchema()` return the bare identifier name.
+- Fixed silent telemetry loss on SPOG (custom-URL) hosts when connecting to an all-purpose cluster via a Thrift `httpPath` like `sql/protocolv1/o/<workspace-id>/<cluster-id>`. The driver now extracts the workspace ID from the cluster path segment (in addition to the existing `?o=<workspace-id>` query-param extraction) and sets it as the `x-databricks-org-id` header on every outgoing request. Without this, telemetry POSTs to `/telemetry-ext` were redirected to `/login` because the workspace context could not be inferred from the URL.
 
 ---
 *Note: When making changes, please add your change under the appropriate section
diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java
index 956695ab5..69d7d3a4e 100644
--- a/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java
+++ b/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java
@@ -1322,6 +1322,11 @@ private String getParameter(DatabricksJdbcUrlParams key, String defaultValue) {
   private static final String ORG_ID_HEADER = "x-databricks-org-id";
   private static final String ORG_ID_QUERY_PARAM = "o";
 
+  // Matches the workspace ID inside an all-purpose-compute Thrift path of the form
+  // [/]sql/protocolv1/o/<workspace-id>/<cluster-id>[/...][?...].
+  private static final java.util.regex.Pattern CLUSTER_PATH_ORG_ID_PATTERN =
+      java.util.regex.Pattern.compile("(?:^|/)sql/protocolv1/o/(\\d+)/[^/?]+");
+
   private Map<String, String> parseCustomHeaders(ImmutableMap<String, String> parameters) {
     String filterPrefix = DatabricksJdbcUrlParams.HTTP_HEADERS.getParamName();
 
@@ -1334,7 +1339,11 @@ private Map<String, String> parseCustomHeaders(ImmutableMap<String, String> para
                         entry -> entry.getKey().substring(filterPrefix.length()),
                         Map.Entry::getValue)));
 
-    // Extract org ID from ?o= in httpPath for SPOG routing
+    // Extract org ID for SPOG routing. Two sources, in priority order:
+    //   1. ?o=<wsid> query parameter in httpPath (warehouse paths typically use this)
+    //   2. /sql/protocolv1/o/<wsid>/<cluster> path segment (all-purpose cluster paths)
+    // Without this, non-Thrift endpoints (e.g. /telemetry-ext) lack workspace context on
+    // SPOG hosts and PoPP redirects them to /login.
     if (!headers.containsKey(ORG_ID_HEADER)) {
       String httpPath =
           parameters.getOrDefault(
@@ -1358,6 +1367,18 @@ private Map<String, String> parseCustomHeaders(ImmutableMap<String, String> para
             "SPOG header extraction: malformed httpPath, skipping org-id extraction: "
                 + e.getMessage());
       }
+
+      if (!headers.containsKey(ORG_ID_HEADER) && !httpPath.isEmpty()) {
+        Matcher clusterMatch = CLUSTER_PATH_ORG_ID_PATTERN.matcher(httpPath);
+        if (clusterMatch.find()) {
+          String orgId = clusterMatch.group(1);
+          headers.put(ORG_ID_HEADER, orgId);
+          LOGGER.debug(
+              "SPOG header extraction: injecting {}={} (extracted from cluster path segment)",
+              ORG_ID_HEADER,
+              orgId);
+        }
+      }
     } else {
       LOGGER.debug(
           "SPOG header extraction: {} already set by caller, not extracting from httpPath",
diff --git a/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java b/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java
index f207c1537..ec30f29d6 100644
--- a/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java
+++ b/src/test/java/com/databricks/jdbc/api/impl/DatabricksConnectionContextTest.java
@@ -1458,6 +1458,80 @@ void testSpogContext_explicitHeaderTakesPrecedence() throws DatabricksSQLExcepti
     assertEquals("fromheader", headers.get("x-databricks-org-id"));
   }
 
+  @Test
+  void testSpogContext_extractsOrgIdFromClusterPathSegment() throws DatabricksSQLException {
+    // All-purpose-compute Thrift path embeds the workspace ID in /o/<wsid>/<cluster>.
+    // No ?o= query param — driver should still extract org-id so non-Thrift endpoints
+    // (e.g. /telemetry-ext) get the x-databricks-org-id header on SPOG hosts.
+    String url =
+        "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;"
+            + "httpPath=sql/protocolv1/o/6051921418418893/0528-220959-uzmcn1qt";
+    Properties props = new Properties();
+    props.put("user", "token");
+    props.put("password", "test-token");
+    IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props);
+
+    Map<String, String> headers = ctx.getCustomHeaders();
+    assertEquals("6051921418418893", headers.get("x-databricks-org-id"));
+  }
+
+  @Test
+  void testSpogContext_clusterPathWithLeadingSlashAlsoExtracts() throws DatabricksSQLException {
+    String url =
+        "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;"
+            + "httpPath=/sql/protocolv1/o/6051921418418893/0528-220959-uzmcn1qt";
+    Properties props = new Properties();
+    props.put("user", "token");
+    props.put("password", "test-token");
+    IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props);
+
+    assertEquals("6051921418418893", ctx.getCustomHeaders().get("x-databricks-org-id"));
+  }
+
+  @Test
+  void testSpogContext_queryParamWinsOverClusterPathSegment() throws DatabricksSQLException {
+    // ?o= takes precedence when both forms are present (matches priority order in the code).
+    String url =
+        "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;"
+            + "httpPath=sql/protocolv1/o/111/0528-220959-uzmcn1qt?o=222";
+    Properties props = new Properties();
+    props.put("user", "token");
+    props.put("password", "test-token");
+    IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props);
+
+    assertEquals("222", ctx.getCustomHeaders().get("x-databricks-org-id"));
+  }
+
+  @Test
+  void testSpogContext_explicitHeaderTakesPrecedenceOverClusterPath()
+      throws DatabricksSQLException {
+    String url =
+        "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;"
+            + "httpPath=sql/protocolv1/o/111/0528-220959-uzmcn1qt;"
+            + "http.header.x-databricks-org-id=fromheader";
+    Properties props = new Properties();
+    props.put("user", "token");
+    props.put("password", "test-token");
+    IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props);
+
+    assertEquals("fromheader", ctx.getCustomHeaders().get("x-databricks-org-id"));
+  }
+
+  @Test
+  void testSpogContext_warehousePathWithoutQueryParamHasNoOrgId() throws DatabricksSQLException {
+    // Warehouse paths never embed the workspace ID, so without ?o= no header is set.
+    // Regression guard for the cluster-path fallback (it must not match warehouse paths).
+    String url =
+        "jdbc:databricks://spog.cloud.databricks.com/default;ssl=1;AuthMech=3;"
+            + "httpPath=/sql/1.0/warehouses/abc123";
+    Properties props = new Properties();
+    props.put("user", "token");
+    props.put("password", "test-token");
+    IDatabricksConnectionContext ctx = DatabricksConnectionContext.parse(url, props);
+
+    assertFalse(ctx.getCustomHeaders().containsKey("x-databricks-org-id"));
+  }
+
   @Test
   public void testDefaultGetterCoverage() throws DatabricksSQLException {
     IDatabricksConnectionContext ctx =