diff --git a/iac/.checkov.yaml b/iac/.checkov.yaml index 4339efd..7284b0e 100644 --- a/iac/.checkov.yaml +++ b/iac/.checkov.yaml @@ -273,3 +273,25 @@ skip-check: # endpoint.this` will be created). Re-evaluate if Checkov improves # cross-module tracing. - CKV2_AZURE_32 + + # CKV2_AZURE_33 — "Ensure storage account is configured with private + # endpoint". The indexer's AzureWebJobsStorage account intentionally + # uses public access (gated on `data_services_public_access_enabled`) + # because the Container Apps Environment hosting the indexer is not + # vnet-integrated — the CAE has `vnetConfig: null`, so even if the + # storage account had a private endpoint the indexer couldn't reach it + # through the missing vnet. This matches the same posture used for + # Cosmos and AI Search across the dev composition. When the CAE + # becomes vnet-integrated under a future spec, this allowlist entry + # should be reconsidered alongside the Cosmos/Search analogues. + - CKV2_AZURE_33 + + # CKV2_AZURE_1 — "Ensure storage for critical data are encrypted with + # Customer Managed Key". The indexer's AzureWebJobsStorage holds only + # the Functions runtime's internal state (no operator data, no audit + # log, no registry payload — those live in Cosmos). Azure-managed keys + # (encryption at rest by default) are the documented posture for + # Functions runtime storage; a CMK on this account would add KV + + # rotation overhead with no data-sensitivity gain. The registry + # critical data IS in Cosmos, which itself uses AAD-only auth. + - CKV2_AZURE_1 diff --git a/iac/environments/dev/main.tf b/iac/environments/dev/main.tf index e07177a..9ab9803 100644 --- a/iac/environments/dev/main.tf +++ b/iac/environments/dev/main.tf @@ -279,6 +279,13 @@ module "workload_identity" { role_definition_name = "Monitoring Metrics Publisher" scope = module.monitoring.application_insights_id } + # Spec 006 indexer — Functions runtime's AzureWebJobsStorage AAD + # connection. Blob Data Owner covers the runtime's container-create + # needs on the storage account declared below. + indexer-webjobs-blob-owner = { + role_definition_name = "Storage Blob Data Owner" + scope = azurerm_storage_account.indexer_webjobs.id + } } api_service_principal_object_id = data.azuread_service_principal.api.object_id @@ -680,6 +687,47 @@ module "ai_search_registry_index" { search_service_name = module.ai_search.name } +# Spec 006 — AzureWebJobsStorage for the indexer Functions runtime. +# +# Even though the Cosmos change-feed trigger uses Cosmos's lease container +# for state, the Functions host still wants `AzureWebJobsStorage` at startup +# or it logs the host as unhealthy and floods the container with "Unable to +# create client for AzureWebJobsStorage" every 30s. We supply a minimal +# AAD-only storage account here; the workload UAMI holds Storage Blob Data +# Owner on it via the role assignment below. No shared keys, no connection +# strings — managed-identity is the only auth path. +resource "azurerm_storage_account" "indexer_webjobs" { + # Storage account names: globally unique, 3-24 lowercase alphanumerics. + # `stbtdev` keeps us within the limit even at long suffixes. + name = "stbtdev${var.unique_suffix}" + resource_group_name = azurerm_resource_group.this.name + location = azurerm_resource_group.this.location + account_tier = "Standard" + account_replication_type = "LRS" + account_kind = "StorageV2" + shared_access_key_enabled = false # AAD-only + public_network_access_enabled = var.data_services_public_access_enabled + min_tls_version = "TLS1_2" + + # CKV_AZURE_190 / CKV2_AZURE_47 — block public anonymous blob access at + # the account level. The Functions runtime never needs anonymous reads; + # all access flows through the workload UAMI's AAD role assignment. + allow_nested_items_to_be_public = false + + blob_properties { + delete_retention_policy { + days = 7 + } + } + + tags = local.shared_tags +} + +# Storage Blob Data Owner for the workload UAMI is wired via the +# workload-identity module's `assigned_azure_rbac` input above +# (entry `indexer-webjobs-blob-owner`) — per the project's +# "no inline IAM in env compositions" lint rule. + module "indexer_container_app" { source = "../../modules/functions-container-app" @@ -708,7 +756,18 @@ module "indexer_container_app" { app_insights_connection_string_kv_secret_uri = azurerm_key_vault_secret.app_insights_connection_string.versionless_id + azure_webjobs_storage_account_name = azurerm_storage_account.indexer_webjobs.name + tags = local.shared_tags + + # The data-plane role assignment (Storage Blob Data Owner) must propagate + # via AAD before the Functions runtime opens its first connection. + # Without an explicit ordering edge, Container Apps revision rollout can + # race ahead of role propagation and the runtime restarts a few times + # before the role catches up. + depends_on = [ + module.workload_identity, + ] } # Per-Container-App diagnostic settings are intentionally NOT provisioned for diff --git a/iac/modules/functions-container-app/README.md b/iac/modules/functions-container-app/README.md index 63533d6..51a9972 100644 --- a/iac/modules/functions-container-app/README.md +++ b/iac/modules/functions-container-app/README.md @@ -33,6 +33,7 @@ composition; this module does not provision them directly. | [ai\_search\_endpoint](#input\_ai\_search\_endpoint) | AI Search service endpoint URI. | `string` | n/a | yes | | [ai\_search\_index\_name](#input\_ai\_search\_index\_name) | AI Search index name (typically `registry-entities-v1`). | `string` | n/a | yes | | [app\_insights\_connection\_string\_kv\_secret\_uri](#input\_app\_insights\_connection\_string\_kv\_secret\_uri) | Key Vault secret URI exposing the App Insights connection string. Mirrors the spec-005 hybrid AI ingestion pattern. | `string` | n/a | yes | +| [azure\_webjobs\_storage\_account\_name](#input\_azure\_webjobs\_storage\_account\_name) | Storage account name backing `AzureWebJobsStorage` for the Functions
runtime. Even though the indexer's only trigger is the Cosmos
change-feed (which uses Cosmos's lease container for state), the
Functions runtime still expects this connection at startup and
flags the host as unhealthy when absent. We supply an AAD-only
storage account here; the workload UAMI is granted Storage Blob
Data Owner on it by the composition (no shared keys, no connection
strings — consistent with the project's managed-identity stance). | `string` | n/a | yes | | [container\_apps\_environment\_id](#input\_container\_apps\_environment\_id) | Container Apps Environment resource id (from spec 005). | `string` | n/a | yes | | [container\_image](#input\_container\_image) | Fully-qualified container image reference (registry/name:tag). | `string` | n/a | yes | | [cosmos\_account\_endpoint](#input\_cosmos\_account\_endpoint) | Cosmos DB account endpoint URI (e.g., https://.documents.azure.com:443/). | `string` | n/a | yes | diff --git a/iac/modules/functions-container-app/main.tf b/iac/modules/functions-container-app/main.tf index 6e6fcc5..528a967 100644 --- a/iac/modules/functions-container-app/main.tf +++ b/iac/modules/functions-container-app/main.tf @@ -93,6 +93,27 @@ resource "azurerm_container_app" "indexer" { name = "FUNCTIONS_WORKER_RUNTIME" value = "dotnet-isolated" } + + # AzureWebJobsStorage — AAD-only. The Functions runtime expects + # this connection at startup even when the only trigger (Cosmos + # change-feed) doesn't need it. Without it the runtime reports + # the host as unhealthy and the indexer container logs spam + # "Unable to create client for AzureWebJobsStorage" every 30s. + # No connection strings, no shared keys — the workload UAMI is + # granted Storage Blob Data Owner on the account by the env + # composition. + env { + name = "AzureWebJobsStorage__accountName" + value = var.azure_webjobs_storage_account_name + } + env { + name = "AzureWebJobsStorage__credential" + value = "managedidentity" + } + env { + name = "AzureWebJobsStorage__clientId" + value = var.workload_uami_client_id + } } } diff --git a/iac/modules/functions-container-app/variables.tf b/iac/modules/functions-container-app/variables.tf index f397b18..42d8031 100644 --- a/iac/modules/functions-container-app/variables.tf +++ b/iac/modules/functions-container-app/variables.tf @@ -94,6 +94,20 @@ variable "memory" { default = "1Gi" } +variable "azure_webjobs_storage_account_name" { + description = <<-EOT + Storage account name backing `AzureWebJobsStorage` for the Functions + runtime. Even though the indexer's only trigger is the Cosmos + change-feed (which uses Cosmos's lease container for state), the + Functions runtime still expects this connection at startup and + flags the host as unhealthy when absent. We supply an AAD-only + storage account here; the workload UAMI is granted Storage Blob + Data Owner on it by the composition (no shared keys, no connection + strings — consistent with the project's managed-identity stance). + EOT + type = string +} + variable "tags" { description = "Resource tags." type = map(string)