Skip to content

Commit 4cd5fda

Browse files
test(e2e): authenticate e2e via OAuth M2M so staging tests match DATABRICKS_USER
The e2e suite connected via a PAT (DATABRICKS_TOKEN). The Personal Staging Location tests PUT/GET/REMOVE against stage://tmp/<DATABRICKS_USER>/..., where DATABRICKS_USER is the PECO service principal (TEST_PECO_SP_ID). A personal stage is identity-scoped by design (there is even a test asserting you cannot touch another user's stage), so the connecting identity MUST equal DATABRICKS_USER. When DATABRICKS_TOKEN authenticates as a different identity, those tests fail with `PERMISSION_DENIED: <user> does not have access to Personal Stage`. Switch the e2e connection to OAuth M2M as the service principal via credentials_provider (conftest.auth_connect_kwargs), so the connecting identity IS the SP == DATABRICKS_USER. Falls back to the PAT when SP OAuth creds aren't set, so local PAT runs are unaffected. Wires DATABRICKS_CLIENT_ID / DATABRICKS_CLIENT_SECRET (TEST_PECO_SP_ID / TEST_PECO_SP_OAUTH_SECRET, already in azure-prod) into code-coverage.yml. Verified locally against the PECO workspace: all 9 staging_ingestion e2e tests pass via the real M2M path (including fails_to_modify_another_staging_user, which validates the identity scoping). Kernel e2e files are unchanged (they run in kernel-e2e.yml, ignored by code-coverage.yml). Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
1 parent 14149d2 commit 4cd5fda

6 files changed

Lines changed: 79 additions & 9 deletions

File tree

.github/workflows/code-coverage.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ jobs:
4545
DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }}
4646
DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }}
4747
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
48+
# OAuth M2M as the PECO service principal. The e2e suite prefers this
49+
# over the PAT (see conftest.auth_connect_kwargs) so the connecting
50+
# identity IS the service principal -- required for the Personal Staging
51+
# Location tests, whose stage path is stage://tmp/<DATABRICKS_USER>/ and
52+
# DATABRICKS_USER is this same SP. A PAT for a different identity fails
53+
# those with PERMISSION_DENIED on the personal stage.
54+
DATABRICKS_CLIENT_ID: ${{ secrets.TEST_PECO_SP_ID }}
55+
DATABRICKS_CLIENT_SECRET: ${{ secrets.TEST_PECO_SP_OAUTH_SECRET }}
4856
DATABRICKS_CATALOG: peco
4957
DATABRICKS_USER: ${{ secrets.TEST_PECO_SP_ID }}
5058
steps:

conftest.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,17 @@ def access_token():
1717
return os.getenv("DATABRICKS_TOKEN")
1818

1919

20+
@pytest.fixture(scope="session")
21+
def client_id():
22+
# OAuth M2M service-principal client id (application id).
23+
return os.getenv("DATABRICKS_CLIENT_ID")
24+
25+
26+
@pytest.fixture(scope="session")
27+
def client_secret():
28+
return os.getenv("DATABRICKS_CLIENT_SECRET")
29+
30+
2031
@pytest.fixture(scope="session")
2132
def ingestion_user():
2233
return os.getenv("DATABRICKS_USER")
@@ -33,12 +44,58 @@ def schema():
3344

3445

3546
@pytest.fixture(scope="session", autouse=True)
36-
def connection_details(host, http_path, access_token, ingestion_user, catalog, schema):
47+
def connection_details(
48+
host,
49+
http_path,
50+
access_token,
51+
client_id,
52+
client_secret,
53+
ingestion_user,
54+
catalog,
55+
schema,
56+
):
3757
return {
3858
"host": host,
3959
"http_path": http_path,
4060
"access_token": access_token,
61+
"client_id": client_id,
62+
"client_secret": client_secret,
4163
"ingestion_user": ingestion_user,
4264
"catalog": catalog,
4365
"schema": schema,
4466
}
67+
68+
69+
def auth_connect_kwargs(details):
70+
"""Return the sql.connect auth kwargs from connection_details.
71+
72+
Prefers OAuth M2M (service principal) when DATABRICKS_CLIENT_ID /
73+
DATABRICKS_CLIENT_SECRET are set, otherwise falls back to a PAT
74+
(DATABRICKS_TOKEN). M2M is required for identity-scoped operations such as
75+
Personal Staging Location tests (stage://tmp/<DATABRICKS_USER>/...), where
76+
the connecting identity must equal DATABRICKS_USER -- the service principal.
77+
A PAT belonging to a different identity cannot access that stage.
78+
"""
79+
client_id = details.get("client_id")
80+
client_secret = details.get("client_secret")
81+
if client_id and client_secret:
82+
host = details["host"]
83+
host_url = host if host.startswith("http") else f"https://{host}"
84+
85+
def credential_provider():
86+
# Imported lazily so a PAT-only environment doesn't require the SDK.
87+
from databricks.sdk.core import Config, oauth_service_principal
88+
89+
return oauth_service_principal(
90+
Config(
91+
host=host_url,
92+
client_id=client_id,
93+
client_secret=client_secret,
94+
# Explicit so an ambient DATABRICKS_TOKEN doesn't collide
95+
# ("more than one authorization method configured").
96+
auth_type="oauth-m2m",
97+
)
98+
)
99+
100+
return {"credentials_provider": credential_provider}
101+
return {"access_token": details.get("access_token")}

tests/e2e/test_circuit_breaker.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import databricks.sql as sql
2323
from databricks.sql.telemetry.circuit_breaker_manager import CircuitBreakerManager
24+
from conftest import auth_connect_kwargs
2425

2526

2627
def wait_for_circuit_state(circuit_breaker, expected_states, timeout=5):
@@ -120,7 +121,7 @@ def mock_request(*args, **kwargs):
120121
with sql.connect(
121122
server_hostname=self.arguments["host"],
122123
http_path=self.arguments["http_path"],
123-
access_token=self.arguments.get("access_token"),
124+
**auth_connect_kwargs(self.arguments),
124125
force_enable_telemetry=True,
125126
telemetry_batch_size=1,
126127
_telemetry_circuit_breaker_enabled=True,
@@ -181,7 +182,7 @@ def mock_rate_limited_request(*args, **kwargs):
181182
with sql.connect(
182183
server_hostname=self.arguments["host"],
183184
http_path=self.arguments["http_path"],
184-
access_token=self.arguments.get("access_token"),
185+
**auth_connect_kwargs(self.arguments),
185186
force_enable_telemetry=True,
186187
telemetry_batch_size=1,
187188
_telemetry_circuit_breaker_enabled=False, # Disabled
@@ -215,7 +216,7 @@ def mock_conditional_request(*args, **kwargs):
215216
with sql.connect(
216217
server_hostname=self.arguments["host"],
217218
http_path=self.arguments["http_path"],
218-
access_token=self.arguments.get("access_token"),
219+
**auth_connect_kwargs(self.arguments),
219220
force_enable_telemetry=True,
220221
telemetry_batch_size=1,
221222
_telemetry_circuit_breaker_enabled=True,

tests/e2e/test_driver.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ def connection_params(self):
9999
return params
100100

101101
def auth_params(self):
102-
return {
103-
"access_token": self.arguments.get("access_token"),
104-
}
102+
from conftest import auth_connect_kwargs
103+
104+
return auth_connect_kwargs(self.arguments)
105105

106106
@contextmanager
107107
def connection(self, extra_params=()):

tests/e2e/test_telemetry_e2e.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ def get_details(self, connection_details):
2424
self.arguments = connection_details.copy()
2525

2626
def connection_params(self):
27+
from conftest import auth_connect_kwargs
28+
2729
return {
2830
"server_hostname": self.arguments["host"],
2931
"http_path": self.arguments["http_path"],
30-
"access_token": self.arguments.get("access_token"),
32+
**auth_connect_kwargs(self.arguments),
3133
}
3234

3335
@contextmanager

tests/e2e/test_transactions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ def _unique_table_name_raw(suffix):
5656
@pytest.fixture
5757
def mst_conn_params(connection_details):
5858
"""Connection parameters with MST enabled."""
59+
from conftest import auth_connect_kwargs
60+
5961
return {
6062
"server_hostname": connection_details["host"],
6163
"http_path": connection_details["http_path"],
62-
"access_token": connection_details.get("access_token"),
64+
**auth_connect_kwargs(connection_details),
6365
"ignore_transactions": False,
6466
}
6567

0 commit comments

Comments
 (0)