From bb583386522d0920d459ce62d25138659c64f60a Mon Sep 17 00:00:00 2001 From: Christopher House Date: Tue, 16 Jun 2026 20:18:42 -0500 Subject: [PATCH 1/3] fix(iac,indexer): provision AzureWebJobsStorage for the dev indexer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dev indexer container reported 'azure.functions.webjobs.storage: Unhealthy — Unable to create client for AzureWebJobsStorage' every 30s because there was no storage account at all in rg-bt-dev and the Functions runtime expects AzureWebJobsStorage at startup. Container Apps itself reported the revision Healthy (the Cosmos change-feed trigger uses Cosmos's own lease container for state, not Azure Storage), so functionally the indexer was still working — but the log noise was constant and the runtime health surface stayed red. Fix: - New azurerm_storage_account.indexer_webjobs in the dev composition (Standard_LRS, StorageV2, shared_access_key_enabled = false, public_network_access_enabled gated on the existing data_services_public_access_enabled variable so it tracks the same toggle as Cosmos / Search / KV). - Workload UAMI granted Storage Blob Data Owner on the new account — covers the runtime's blob-container-create needs. - functions-container-app module gains a new variable azure_webjobs_storage_account_name and injects three env vars on the container: AzureWebJobsStorage__accountName = AzureWebJobsStorage__credential = managedidentity AzureWebJobsStorage__clientId = - Indexer module call depends_on the role assignment so the data-plane role propagates via AAD before the Functions runtime first connects. No shared keys, no connection strings — matches the project's managed-identity-only stance. Test/prod compositions don't yet wire the indexer module, so no changes needed there. When they do, they'll pass the same variable shape. Co-Authored-By: Claude Opus 4.7 (1M context) --- iac/environments/dev/main.tf | 49 +++++++++++++++++++ iac/modules/functions-container-app/README.md | 1 + iac/modules/functions-container-app/main.tf | 21 ++++++++ .../functions-container-app/variables.tf | 14 ++++++ 4 files changed, 85 insertions(+) diff --git a/iac/environments/dev/main.tf b/iac/environments/dev/main.tf index e07177a..9b1a9ca 100644 --- a/iac/environments/dev/main.tf +++ b/iac/environments/dev/main.tf @@ -680,6 +680,44 @@ module "ai_search_registry_index" { search_service_name = module.ai_search.name } +# Spec 006 — AzureWebJobsStorage for the indexer Functions runtime. +# +# Even though the Cosmos change-feed trigger uses Cosmos's lease container +# for state, the Functions host still wants `AzureWebJobsStorage` at startup +# or it logs the host as unhealthy and floods the container with "Unable to +# create client for AzureWebJobsStorage" every 30s. We supply a minimal +# AAD-only storage account here; the workload UAMI holds Storage Blob Data +# Owner on it via the role assignment below. No shared keys, no connection +# strings — managed-identity is the only auth path. +resource "azurerm_storage_account" "indexer_webjobs" { + # Storage account names: globally unique, 3-24 lowercase alphanumerics. + # `stbtdev` keeps us within the limit even at long suffixes. + name = "stbtdev${var.unique_suffix}" + resource_group_name = azurerm_resource_group.this.name + location = azurerm_resource_group.this.location + account_tier = "Standard" + account_replication_type = "LRS" + account_kind = "StorageV2" + shared_access_key_enabled = false + public_network_access_enabled = var.data_services_public_access_enabled + min_tls_version = "TLS1_2" + + blob_properties { + delete_retention_policy { + days = 7 + } + } + + tags = local.shared_tags +} + +resource "azurerm_role_assignment" "workload_uami_indexer_webjobs_blob_owner" { + scope = azurerm_storage_account.indexer_webjobs.id + role_definition_name = "Storage Blob Data Owner" + principal_id = module.workload_identity.principal_id + description = "Indexer Functions runtime AAD access to AzureWebJobsStorage. Blob Data Owner covers the runtime's container-create needs." +} + module "indexer_container_app" { source = "../../modules/functions-container-app" @@ -708,7 +746,18 @@ module "indexer_container_app" { app_insights_connection_string_kv_secret_uri = azurerm_key_vault_secret.app_insights_connection_string.versionless_id + azure_webjobs_storage_account_name = azurerm_storage_account.indexer_webjobs.name + tags = local.shared_tags + + # The data-plane role assignment (Storage Blob Data Owner) must propagate + # via AAD before the Functions runtime opens its first connection. + # Without an explicit ordering edge, Container Apps revision rollout can + # race ahead of role propagation and the runtime restarts a few times + # before the role catches up. + depends_on = [ + azurerm_role_assignment.workload_uami_indexer_webjobs_blob_owner, + ] } # Per-Container-App diagnostic settings are intentionally NOT provisioned for diff --git a/iac/modules/functions-container-app/README.md b/iac/modules/functions-container-app/README.md index 63533d6..51a9972 100644 --- a/iac/modules/functions-container-app/README.md +++ b/iac/modules/functions-container-app/README.md @@ -33,6 +33,7 @@ composition; this module does not provision them directly. | [ai\_search\_endpoint](#input\_ai\_search\_endpoint) | AI Search service endpoint URI. | `string` | n/a | yes | | [ai\_search\_index\_name](#input\_ai\_search\_index\_name) | AI Search index name (typically `registry-entities-v1`). | `string` | n/a | yes | | [app\_insights\_connection\_string\_kv\_secret\_uri](#input\_app\_insights\_connection\_string\_kv\_secret\_uri) | Key Vault secret URI exposing the App Insights connection string. Mirrors the spec-005 hybrid AI ingestion pattern. | `string` | n/a | yes | +| [azure\_webjobs\_storage\_account\_name](#input\_azure\_webjobs\_storage\_account\_name) | Storage account name backing `AzureWebJobsStorage` for the Functions
runtime. Even though the indexer's only trigger is the Cosmos
change-feed (which uses Cosmos's lease container for state), the
Functions runtime still expects this connection at startup and
flags the host as unhealthy when absent. We supply an AAD-only
storage account here; the workload UAMI is granted Storage Blob
Data Owner on it by the composition (no shared keys, no connection
strings — consistent with the project's managed-identity stance). | `string` | n/a | yes | | [container\_apps\_environment\_id](#input\_container\_apps\_environment\_id) | Container Apps Environment resource id (from spec 005). | `string` | n/a | yes | | [container\_image](#input\_container\_image) | Fully-qualified container image reference (registry/name:tag). | `string` | n/a | yes | | [cosmos\_account\_endpoint](#input\_cosmos\_account\_endpoint) | Cosmos DB account endpoint URI (e.g., https://.documents.azure.com:443/). | `string` | n/a | yes | diff --git a/iac/modules/functions-container-app/main.tf b/iac/modules/functions-container-app/main.tf index 6e6fcc5..528a967 100644 --- a/iac/modules/functions-container-app/main.tf +++ b/iac/modules/functions-container-app/main.tf @@ -93,6 +93,27 @@ resource "azurerm_container_app" "indexer" { name = "FUNCTIONS_WORKER_RUNTIME" value = "dotnet-isolated" } + + # AzureWebJobsStorage — AAD-only. The Functions runtime expects + # this connection at startup even when the only trigger (Cosmos + # change-feed) doesn't need it. Without it the runtime reports + # the host as unhealthy and the indexer container logs spam + # "Unable to create client for AzureWebJobsStorage" every 30s. + # No connection strings, no shared keys — the workload UAMI is + # granted Storage Blob Data Owner on the account by the env + # composition. + env { + name = "AzureWebJobsStorage__accountName" + value = var.azure_webjobs_storage_account_name + } + env { + name = "AzureWebJobsStorage__credential" + value = "managedidentity" + } + env { + name = "AzureWebJobsStorage__clientId" + value = var.workload_uami_client_id + } } } diff --git a/iac/modules/functions-container-app/variables.tf b/iac/modules/functions-container-app/variables.tf index f397b18..42d8031 100644 --- a/iac/modules/functions-container-app/variables.tf +++ b/iac/modules/functions-container-app/variables.tf @@ -94,6 +94,20 @@ variable "memory" { default = "1Gi" } +variable "azure_webjobs_storage_account_name" { + description = <<-EOT + Storage account name backing `AzureWebJobsStorage` for the Functions + runtime. Even though the indexer's only trigger is the Cosmos + change-feed (which uses Cosmos's lease container for state), the + Functions runtime still expects this connection at startup and + flags the host as unhealthy when absent. We supply an AAD-only + storage account here; the workload UAMI is granted Storage Blob + Data Owner on it by the composition (no shared keys, no connection + strings — consistent with the project's managed-identity stance). + EOT + type = string +} + variable "tags" { description = "Resource tags." type = map(string) From 7f930afdf99e5dd06b97ab63c91faa7ce427cfcf Mon Sep 17 00:00:00 2001 From: Christopher House Date: Tue, 16 Jun 2026 20:36:51 -0500 Subject: [PATCH 2/3] fix(iac): tighten + allowlist checkov rules for the new indexer storage account MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI's checkov scan tripped four rules on azurerm_storage_account.indexer_webjobs: CKV_AZURE_190 Ensure that Storage blobs restrict public access CKV2_AZURE_47 Ensure storage account is configured without blob anonymous access CKV2_AZURE_33 Ensure storage account is configured with private endpoint CKV2_AZURE_1 Ensure storage for critical data are encrypted with Customer Managed Key Fix: - Two of them (190 + 47) are real and cheap to satisfy — set `allow_nested_items_to_be_public = false` on the account so anonymous blob access is blocked at the account level. The Functions runtime never needs anonymous reads. - CKV2_AZURE_33 (private endpoint) is allowlisted with justification: the Container Apps Environment hosting the indexer has `vnetConfig: null`, so even if a private endpoint existed the indexer couldn't reach it. Same architectural posture as Cosmos / AI Search in dev. Allowlist entry calls out the dependency on a future CAE vnet integration so the rule gets re-evaluated when that happens. - CKV2_AZURE_1 (CMK) is allowlisted with justification: the account holds only Functions runtime internal state — no operator data, no audit log, no registry payload (those live in Cosmos). Azure-managed keys are the documented posture for AzureWebJobsStorage. Co-Authored-By: Claude Opus 4.7 (1M context) --- iac/.checkov.yaml | 22 ++++++++++++++++++++++ iac/environments/dev/main.tf | 7 ++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/iac/.checkov.yaml b/iac/.checkov.yaml index 4339efd..7284b0e 100644 --- a/iac/.checkov.yaml +++ b/iac/.checkov.yaml @@ -273,3 +273,25 @@ skip-check: # endpoint.this` will be created). Re-evaluate if Checkov improves # cross-module tracing. - CKV2_AZURE_32 + + # CKV2_AZURE_33 — "Ensure storage account is configured with private + # endpoint". The indexer's AzureWebJobsStorage account intentionally + # uses public access (gated on `data_services_public_access_enabled`) + # because the Container Apps Environment hosting the indexer is not + # vnet-integrated — the CAE has `vnetConfig: null`, so even if the + # storage account had a private endpoint the indexer couldn't reach it + # through the missing vnet. This matches the same posture used for + # Cosmos and AI Search across the dev composition. When the CAE + # becomes vnet-integrated under a future spec, this allowlist entry + # should be reconsidered alongside the Cosmos/Search analogues. + - CKV2_AZURE_33 + + # CKV2_AZURE_1 — "Ensure storage for critical data are encrypted with + # Customer Managed Key". The indexer's AzureWebJobsStorage holds only + # the Functions runtime's internal state (no operator data, no audit + # log, no registry payload — those live in Cosmos). Azure-managed keys + # (encryption at rest by default) are the documented posture for + # Functions runtime storage; a CMK on this account would add KV + + # rotation overhead with no data-sensitivity gain. The registry + # critical data IS in Cosmos, which itself uses AAD-only auth. + - CKV2_AZURE_1 diff --git a/iac/environments/dev/main.tf b/iac/environments/dev/main.tf index 9b1a9ca..d1b3f22 100644 --- a/iac/environments/dev/main.tf +++ b/iac/environments/dev/main.tf @@ -698,10 +698,15 @@ resource "azurerm_storage_account" "indexer_webjobs" { account_tier = "Standard" account_replication_type = "LRS" account_kind = "StorageV2" - shared_access_key_enabled = false + shared_access_key_enabled = false # AAD-only public_network_access_enabled = var.data_services_public_access_enabled min_tls_version = "TLS1_2" + # CKV_AZURE_190 / CKV2_AZURE_47 — block public anonymous blob access at + # the account level. The Functions runtime never needs anonymous reads; + # all access flows through the workload UAMI's AAD role assignment. + allow_nested_items_to_be_public = false + blob_properties { delete_retention_policy { days = 7 From d7b7b760065962e30c817abc74a0751f6b017a67 Mon Sep 17 00:00:00 2001 From: Christopher House Date: Tue, 16 Jun 2026 20:41:37 -0500 Subject: [PATCH 3/3] fix(iac): wire indexer storage role via workload-identity module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The project's 'no inline IAM in env compositions' lint (scripts/lint-iac-inline-iam.sh) rejected the inline azurerm_role_assignment.workload_uami_indexer_webjobs_blob_owner in iac/environments/dev/main.tf. The conventional path is to add the role to the workload-identity module's assigned_azure_rbac input map. Fix: - Add a new 'indexer-webjobs-blob-owner' entry to the existing assigned_azure_rbac map (Storage Blob Data Owner on the new storage account) — same pattern as acr-pull, kv-secrets-user, and monitoring-metrics-publisher. - Drop the inline azurerm_role_assignment resource block. - Indexer module depends_on the workload_identity module (covers the new role assignment + the existing ones). Behavior identical to the previous attempt; this is purely a project- convention cleanup so the lint passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- iac/environments/dev/main.tf | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/iac/environments/dev/main.tf b/iac/environments/dev/main.tf index d1b3f22..9ab9803 100644 --- a/iac/environments/dev/main.tf +++ b/iac/environments/dev/main.tf @@ -279,6 +279,13 @@ module "workload_identity" { role_definition_name = "Monitoring Metrics Publisher" scope = module.monitoring.application_insights_id } + # Spec 006 indexer — Functions runtime's AzureWebJobsStorage AAD + # connection. Blob Data Owner covers the runtime's container-create + # needs on the storage account declared below. + indexer-webjobs-blob-owner = { + role_definition_name = "Storage Blob Data Owner" + scope = azurerm_storage_account.indexer_webjobs.id + } } api_service_principal_object_id = data.azuread_service_principal.api.object_id @@ -716,12 +723,10 @@ resource "azurerm_storage_account" "indexer_webjobs" { tags = local.shared_tags } -resource "azurerm_role_assignment" "workload_uami_indexer_webjobs_blob_owner" { - scope = azurerm_storage_account.indexer_webjobs.id - role_definition_name = "Storage Blob Data Owner" - principal_id = module.workload_identity.principal_id - description = "Indexer Functions runtime AAD access to AzureWebJobsStorage. Blob Data Owner covers the runtime's container-create needs." -} +# Storage Blob Data Owner for the workload UAMI is wired via the +# workload-identity module's `assigned_azure_rbac` input above +# (entry `indexer-webjobs-blob-owner`) — per the project's +# "no inline IAM in env compositions" lint rule. module "indexer_container_app" { source = "../../modules/functions-container-app" @@ -761,7 +766,7 @@ module "indexer_container_app" { # race ahead of role propagation and the runtime restarts a few times # before the role catches up. depends_on = [ - azurerm_role_assignment.workload_uami_indexer_webjobs_blob_owner, + module.workload_identity, ] }