From 18967ea43806afb9c910ac0bdcda954fadef25b1 Mon Sep 17 00:00:00 2001 From: Christopher House Date: Tue, 16 Jun 2026 21:05:08 -0500 Subject: [PATCH] fix(iac): wire pipeline MI for AAD storage data-plane on indexer_webjobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CD-dev was failing on 'tofu apply — infra-only' with: KeyBasedAuthenticationNotPermitted: Key based authentication is not permitted on this storage account. Root cause chain: 1. Indexer storage account has shared_access_key_enabled = false 2. azurerm provider's post-create blob data-plane wait uses key auth by default → 403s 3. provider config adds storage_use_azuread = true to switch to AAD 4. But the pipeline MI has no Storage Blob Data role on the new account, so AAD also 403s 5. To grant the role, Storage Blob Data Owner must be in the bootstrap pipeline-RBAC-Admin condition allowlist This change ships all four pieces: A. iac/platform-bootstrap/main.tf — adds Storage Blob Data Owner role GUID (b7e6dc6d-f1e8-4753-8033-0f276bb0955b) to the pipeline_role_admin condition allowlist. After merge an operator must run 'tofu apply' against the bootstrap module before CD-dev can succeed (bootstrap state is operator-held per the module's README, not CI-applied). B. iac/environments/dev/providers.tf — adds storage_use_azuread = true to the azurerm provider so data-plane wait + future ops use AAD across the dev composition. C. iac/environments/dev/main.tf — adds inline pipeline self-grant for Storage Blob Data Owner at RG scope, with a 60s time_sleep for AAD propagation matching the existing wait_for_kv_rbac_propagation pattern. Storage account depends_on the sleep so creation only proceeds after the role has propagated. D. scripts/lint-iac-inline-iam.sh — allowlists the new inline IAM resource (pipeline self-grant scope is the env RG, not foldable into the workload-identity module). The workload UAMI's grant for runtime AAD usage continues to flow through workload-identity module's assigned_azure_rbac map. Co-Authored-By: Claude Opus 4.7 (1M context) --- iac/environments/dev/main.tf | 36 +++++++++++++++++++++++++++++++ iac/environments/dev/providers.tf | 10 +++++++++ iac/platform-bootstrap/main.tf | 15 ++++++++++++- scripts/lint-iac-inline-iam.sh | 8 +++++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/iac/environments/dev/main.tf b/iac/environments/dev/main.tf index 9ab9803..6665507 100644 --- a/iac/environments/dev/main.tf +++ b/iac/environments/dev/main.tf @@ -696,6 +696,35 @@ module "ai_search_registry_index" { # AAD-only storage account here; the workload UAMI holds Storage Blob Data # Owner on it via the role assignment below. No shared keys, no connection # strings — managed-identity is the only auth path. + +# Pipeline MI self-grant — required because the indexer storage account has +# `shared_access_key_enabled = false`, so the azurerm provider's post-create +# blob data-plane wait must use AAD (via `storage_use_azuread = true` on the +# provider). Without this grant, the AAD call from the provider 403s and +# `tofu apply` fails on the storage account resource. RG-scoped so the role +# assignment can be created BEFORE the storage account (the storage account +# resource block depends_on this grant + a propagation sleep below). Mirrors +# the `pipeline_kv_secrets_officer` pattern: per-env, RG-scoped, allowlisted +# in `scripts/lint-iac-inline-iam.sh` because the workload-identity module +# is parented on the workload UAMI and this is a pipeline grant. +resource "azurerm_role_assignment" "pipeline_storage_blob_data_owner" { + scope = azurerm_resource_group.this.id + role_definition_name = "Storage Blob Data Owner" + principal_id = data.azurerm_client_config.current.object_id + description = "Pipeline MI manages `azurerm_storage_account.indexer_webjobs` data-plane wait via AAD (shared keys disabled on the account)." +} + +# Azure AD role assignments have eventual-consistency propagation. Without +# this sleep, the first `azurerm_storage_account` post-create data-plane +# wait races propagation of the pipeline grant and 403s — same shape as the +# KV `wait_for_kv_rbac_propagation` block above. +resource "time_sleep" "wait_for_storage_rbac_propagation" { + depends_on = [ + azurerm_role_assignment.pipeline_storage_blob_data_owner, + ] + create_duration = "60s" +} + resource "azurerm_storage_account" "indexer_webjobs" { # Storage account names: globally unique, 3-24 lowercase alphanumerics. # `stbtdev` keeps us within the limit even at long suffixes. @@ -721,6 +750,13 @@ resource "azurerm_storage_account" "indexer_webjobs" { } tags = local.shared_tags + + # Ordering edge for the provider's post-create blob data-plane wait — + # the pipeline MI's Storage Blob Data Owner role must exist + propagate + # before the storage account is created. + depends_on = [ + time_sleep.wait_for_storage_rbac_propagation, + ] } # Storage Blob Data Owner for the workload UAMI is wired via the diff --git a/iac/environments/dev/providers.tf b/iac/environments/dev/providers.tf index 043352c..441afa9 100644 --- a/iac/environments/dev/providers.tf +++ b/iac/environments/dev/providers.tf @@ -47,6 +47,16 @@ terraform { provider "azurerm" { subscription_id = var.subscription_id + # Spec 006 indexer storage — keys are disabled on the new + # AzureWebJobsStorage account (`shared_access_key_enabled = false`). + # Without this flag, the provider's post-create data-plane wait for + # the Blob service uses key-based auth and 403s on + # `KeyBasedAuthenticationNotPermitted`, failing the apply. The flag + # tells azurerm to use AAD for data-plane operations on all storage + # accounts in this composition; key-based ops continue to work where + # they're still enabled. + storage_use_azuread = true + features { resource_group { prevent_deletion_if_contains_resources = true diff --git a/iac/platform-bootstrap/main.tf b/iac/platform-bootstrap/main.tf index 88f5a43..8b78f24 100644 --- a/iac/platform-bootstrap/main.tf +++ b/iac/platform-bootstrap/main.tf @@ -231,6 +231,18 @@ resource "azurerm_role_assignment" "pipeline_role_admin" { # grants; this allowlist permits namespace-scope and resource-group- # scope grants only via the existing scope-evaluation gates. # + # Spec 006 indexer storage — AzureWebJobsStorage AAD access: + # b7e6dc6d-f1e8-4753-8033-0f276bb0955b Storage Blob Data Owner + # → granted to the workload identity on the indexer's + # AzureWebJobsStorage account so the Functions runtime can + # authenticate to its internal blob containers via AAD (the + # account has `shared_access_key_enabled = false`). The pipeline + # MI also needs this role on the same account so the azurerm + # provider's post-create blob data-plane wait succeeds with + # `storage_use_azuread = true`; that self-grant is wired inline + # in `iac/environments/dev/main.tf` (allowlist entry in + # `scripts/lint-iac-inline-iam.sh`). + # # NOT in this list: Cosmos DB Built-in Data Contributor — Cosmos uses its # OWN native RBAC surface (`azurerm_cosmosdb_sql_role_assignment`), not # Azure RBAC (`azurerm_role_assignment`), so this condition does not govern @@ -249,7 +261,8 @@ resource "azurerm_role_assignment" "pipeline_role_admin" { 4f6d3b9b-027b-4f4c-9142-0e5a2a2247e0, 8ebe5a00-799e-43f5-93ac-243d3dce84a7, 3913510d-42f4-4e42-8a64-420c390055eb, - acdd72a7-3385-48ef-bd42-f606fba81ae7 + acdd72a7-3385-48ef-bd42-f606fba81ae7, + b7e6dc6d-f1e8-4753-8033-0f276bb0955b } ) CONDITION diff --git a/scripts/lint-iac-inline-iam.sh b/scripts/lint-iac-inline-iam.sh index f98cebc..48e6030 100755 --- a/scripts/lint-iac-inline-iam.sh +++ b/scripts/lint-iac-inline-iam.sh @@ -41,6 +41,14 @@ ALLOWLIST=( # Standing operator access for `kv_operator_object_ids`. Per-env operator # set — not a workload grant, not a fit for the workload-identity module. "azurerm_role_assignment.operator_kv_secrets_officer" + + # Spec 006 indexer storage — pipeline MI's data-plane grant on the + # indexer's AzureWebJobsStorage account. Required because the account + # has `shared_access_key_enabled = false`, so the azurerm provider's + # post-create blob data-plane wait must use AAD. Same shape as the KV + # pipeline grant above; the workload-identity module is parented on + # the workload UAMI and this is a PIPELINE grant — can't be folded in. + "azurerm_role_assignment.pipeline_storage_blob_data_owner" ) # `iac/environments//main.tf` only — submodules + helper files are