From de5335758a778bf62ed124e0115614c085aa3320 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 14:18:07 +0100 Subject: [PATCH 01/16] chore: rn deploy job from infra --- .github/workflows/deploy_dev_infra.yml | 41 ------------------------- .github/workflows/deploy_prod_infra.yml | 15 --------- AGENTS.md | 2 +- README.md | 2 +- infra/README.md | 6 ++-- 5 files changed, 4 insertions(+), 62 deletions(-) diff --git a/.github/workflows/deploy_dev_infra.yml b/.github/workflows/deploy_dev_infra.yml index a9c355c..a553fec 100644 --- a/.github/workflows/deploy_dev_infra.yml +++ b/.github/workflows/deploy_dev_infra.yml @@ -32,44 +32,3 @@ jobs: lambda_matrix: ${{ needs.setup.outputs.lambda_dirs }} bootstrap_image_uri: ${{ needs.code.outputs.bootstrap_image_uri }} service_matrix: ${{ needs.setup.outputs.ecs_service_dirs }} - - build: - name: Build Artifacts - uses: ./.github/workflows/build.yml - needs: - - code - - setup - with: - environment: dev - lambda_version: ${{ github.sha }} - frontend_version: ${{ github.sha }} - ecs_version: ${{ github.sha }} - lambda_matrix: ${{ needs.setup.outputs.lambda_dirs }} - ecs_matrix: ${{ needs.setup.outputs.container_dirs }} - - get_build: - name: Resolve Build Outputs - needs: build - uses: ./.github/workflows/build_get.yml - with: - environment: dev - lambda_version: ${{ github.sha }} - frontend_version: ${{ github.sha }} - ecs_version: ${{ github.sha }} - - deploy: - name: Deploy Code - uses: ./.github/workflows/deploy.yml - needs: - - setup - - build - - get_build - - infra - with: - environment: dev - lambda_version: ${{ needs.build.outputs.lambda_version }} - frontend_version: ${{ needs.build.outputs.frontend_version }} - code_bucket: ${{ needs.get_build.outputs.code_bucket }} - lambda_matrix: ${{ needs.setup.outputs.lambda_dirs }} - task_matrix: ${{ needs.get_build.outputs.ecs_task_matrix }} - ecs_image_uris: ${{ needs.get_build.outputs.ecs_image_uris }} diff --git a/.github/workflows/deploy_prod_infra.yml b/.github/workflows/deploy_prod_infra.yml index afffbe5..b0ade0e 100644 --- a/.github/workflows/deploy_prod_infra.yml +++ b/.github/workflows/deploy_prod_infra.yml @@ -29,18 +29,3 @@ jobs: lambda_matrix: ${{ needs.get_build.outputs.lambda_version_files }} bootstrap_image_uri: ${{ needs.get_build.outputs.bootstrap_image_uri }} service_matrix: ${{ needs.get_build.outputs.ecs_service_matrix }} - - deploy: - name: Deploy Code - uses: ./.github/workflows/deploy.yml - needs: - - get_build - - infra # this is only to ensure infra runs before deploy no dependencies on infra outputs i.e. infra is managed separately - with: - environment: prod - lambda_version: ${{ needs.get_build.outputs.lambda_version }} - frontend_version: ${{ needs.get_build.outputs.frontend_version }} - code_bucket: ${{ needs.get_build.outputs.code_bucket }} - lambda_matrix: ${{ needs.get_build.outputs.lambda_version_files }} - task_matrix: ${{ needs.get_build.outputs.ecs_task_matrix }} - ecs_image_uris: ${{ needs.get_build.outputs.ecs_image_uris }} diff --git a/AGENTS.md b/AGENTS.md index 642fb66..7bc3944 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -86,7 +86,7 @@ When changing CI workflows or Terraform module dependencies, check dependency be - check required Terraform input variables on destroy paths as well as apply paths; destroy can still fail before resource deletion if required vars are unset - make sure every referenced `needs..outputs.*` value is actually in scope for that job - make sure matrix values match the expected naming contract for the workflow, module, or path being used -- for `*_infra` deploy wrappers, verify the infra workflow receives the directory-based infra matrices it needs, while deploy workflows receive the artifact-based matrices and image URIs they need +- for `*_infra` wrappers, verify they stop at infrastructure apply and do not also run the reusable `deploy.yml` code rollout - for prod wrappers in this repo, remember that shared artifact resources come from `ci`, while deploy target resources are still in `prod` - prefer making modules tolerant of unnecessary upstream state dependencies where possible - do not change CI ordering blindly; first check whether the real issue is an avoidable cross-stack dependency diff --git a/README.md b/README.md index c56923c..64ca791 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Terragrunt also provides a shared default ECR repository name to ECS task module - override it in `infra/live//environment_vars.hcl` only if the repository naming diverges from that convention - the concrete ECS worker task wrapper defaults `local_tunnel = false` and `xray_enabled = false` unless you explicitly set them -The reusable deploy workflows follow the same split: `prod` `*_code` and `*_infra` wrappers read shared artifact resources from `ci`, but `*_infra` still applies `prod` infrastructure stacks using the repo's directory-derived service and lambda matrices. +The reusable deploy workflows follow the same split: `prod` `*_code` and `*_infra` wrappers read shared artifact resources from `ci`, but `*_infra` only applies `prod` infrastructure stacks using the repo's directory-derived service and lambda matrices. For `*_code` release deploys, pass explicit release versions for each runtime you want to roll out. In particular, ECS code deploys should provide an `ecs_version` rather than relying on a Lambda-version fallback. diff --git a/infra/README.md b/infra/README.md index 838158b..41dc582 100644 --- a/infra/README.md +++ b/infra/README.md @@ -73,10 +73,8 @@ stores state at: - infra workflows create or update infrastructure stacks - build workflows produce Lambda zips and container images -- `*_infra` deploy wrappers need two kinds of reusable-workflow inputs: - - directory-derived infra matrices for stack applies - - artifact-derived versions, task matrices, and image URIs for code deploys -- in `prod`, the wrappers read shared artifact resources from `ci` but still apply service and task stacks in `prod` +- `*_infra` wrappers need the inputs required to apply infra safely, such as directory-derived stack matrices and any artifact-derived bootstrap references +- in `prod`, the `*_infra` wrappers read shared artifact resources from `ci` but only apply service and task stacks in `prod` - deploy workflows: - publish Lambda versions and use Lambda CodeDeploy - register ECS task revisions From d78ad8123d712a7b4aed06fbd14b64fbc868838b Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 14:36:49 +0100 Subject: [PATCH 02/16] feat: blue/gree service example --- Dockerfile | 25 +++++- README.md | 15 ++++ containers/api/app.py | 75 ++++++++++++++++++ containers/api/requirements.txt | 1 + infra/README.md | 7 ++ infra/live/dev/aws/service_api/terragrunt.hcl | 7 ++ infra/live/dev/aws/task_api/terragrunt.hcl | 7 ++ .../live/prod/aws/service_api/terragrunt.hcl | 7 ++ infra/live/prod/aws/task_api/terragrunt.hcl | 7 ++ infra/modules/aws/service_api/README.md | 29 +++++++ infra/modules/aws/service_api/data.tf | 59 ++++++++++++++ infra/modules/aws/service_api/main.tf | 42 ++++++++++ infra/modules/aws/service_api/outputs.tf | 19 +++++ infra/modules/aws/service_api/variables.tf | 77 +++++++++++++++++++ infra/modules/aws/task_api/README.md | 22 ++++++ infra/modules/aws/task_api/main.tf | 25 ++++++ infra/modules/aws/task_api/outputs.tf | 15 ++++ infra/modules/aws/task_api/variables.tf | 64 +++++++++++++++ justfile | 1 + 19 files changed, 500 insertions(+), 4 deletions(-) create mode 100644 containers/api/app.py create mode 100644 containers/api/requirements.txt create mode 100644 infra/live/dev/aws/service_api/terragrunt.hcl create mode 100644 infra/live/dev/aws/task_api/terragrunt.hcl create mode 100644 infra/live/prod/aws/service_api/terragrunt.hcl create mode 100644 infra/live/prod/aws/task_api/terragrunt.hcl create mode 100644 infra/modules/aws/service_api/README.md create mode 100644 infra/modules/aws/service_api/data.tf create mode 100644 infra/modules/aws/service_api/main.tf create mode 100644 infra/modules/aws/service_api/outputs.tf create mode 100644 infra/modules/aws/service_api/variables.tf create mode 100644 infra/modules/aws/task_api/README.md create mode 100644 infra/modules/aws/task_api/main.tf create mode 100644 infra/modules/aws/task_api/outputs.tf create mode 100644 infra/modules/aws/task_api/variables.tf diff --git a/Dockerfile b/Dockerfile index 74508ac..9038ba7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,31 @@ +ARG SERVICE + FROM python:3.12-slim AS python-base WORKDIR /usr/app -COPY containers/worker/requirements.txt /tmp/requirements-worker.txt -RUN pip install --no-cache-dir -r /tmp/requirements-worker.txt +FROM python-base AS service-base + +ARG SERVICE + +COPY containers/${SERVICE}/requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt + + +FROM service-base AS worker + +ARG SERVICE + +COPY containers/${SERVICE}/app.py /usr/app/app.py + +CMD ["python", "-u", "app.py"] + +FROM service-base AS api -FROM python-base AS worker +ARG SERVICE -COPY containers/worker/app.py /usr/app/app.py +COPY containers/${SERVICE}/app.py /usr/app/app.py CMD ["python", "-u", "app.py"] diff --git a/README.md b/README.md index 64ca791..82c1aea 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,12 @@ The repo `network` module also owns the shared internal ALB and shared HTTP API - VPC link - internal ALB and target groups +This repo now includes a sample ECS API container service exposed separately from the Lambda API: + +- ECS API path: `/blue-green-api` +- deployment model: ECS CodeDeploy `blue_green` +- stacks: `task_api` and `service_api` + The `api` module is Lambda-specific and plugs the Lambda integration and root routes into that shared API. Terragrunt also provides a shared default ECR repository name to ECS task modules: @@ -45,6 +51,15 @@ The reusable deploy workflows follow the same split: `prod` `*_code` and `*_infr For `*_code` release deploys, pass explicit release versions for each runtime you want to roll out. In particular, ECS code deploys should provide an `ecs_version` rather than relying on a Lambda-version fallback. +## 🧪 example prompts + +Use prompts like these when asking for a new service in this repo: + +- `Add a new ECS service called billing_api exposed on /billing via API Gateway VPC link, with task_billing_api/service_billing_api, canary deploys, and update the docs.` +- `Create a new internal ECS worker called report_worker using task_report_worker/service_report_worker, rolling deploys, and hook it into the existing container build flow.` +- `Add a new Lambda called invoice_sync with its live stacks in dev and prod, wire it into the existing lambda build/deploy workflows, and document the new module contract.` +- `Create a new public Lambda API endpoint for /reports, keep it Lambda-backed rather than ECS, and update the repo docs and workflow expectations.` + ## 🛠️ local plan some infra Given a terragrunt file is found at `infra/live/dev/aws/api/terragrunt.hcl` diff --git a/containers/api/app.py b/containers/api/app.py new file mode 100644 index 0000000..03decbe --- /dev/null +++ b/containers/api/app.py @@ -0,0 +1,75 @@ +import json +import os +import socket +from http.server import BaseHTTPRequestHandler, HTTPServer + + +HOST = "0.0.0.0" +PORT = int(os.getenv("PORT", "80")) +ROOT_PATH = os.getenv("ROOT_PATH", "") +SERVICE_NAME = os.getenv("AWS_SERVICE_NAME", "ecs-blue-green-api") +IMAGE = os.getenv("IMAGE", "unknown") + + +def _normalize_root_path(root_path: str) -> str: + if not root_path: + return "" + return root_path if root_path.startswith("/") else f"/{root_path}" + + +ROOT_PATH_PREFIX = _normalize_root_path(ROOT_PATH.rstrip("/")) + + +def route_for(path: str) -> str: + if ROOT_PATH_PREFIX and path.startswith(ROOT_PATH_PREFIX): + trimmed = path[len(ROOT_PATH_PREFIX):] + return trimmed or "/" + return path or "/" + + +class Handler(BaseHTTPRequestHandler): + server_version = "BlueGreenAPI/1.0" + + def _write_json(self, status: int, body: dict) -> None: + encoded = json.dumps(body).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(encoded))) + self.end_headers() + self.wfile.write(encoded) + + def do_GET(self) -> None: # noqa: N802 + route = route_for(self.path.split("?", 1)[0]) + + if route == "/health": + self._write_json(200, {"status": "ok", "service": SERVICE_NAME}) + return + + if route in ("/fail", "/error"): + self._write_json( + 500, + { + "message": "Forced failure for testing", + "service": SERVICE_NAME, + "route": route, + }, + ) + return + + self._write_json( + 200, + { + "message": "Hello from the blue/green ECS API", + "service": SERVICE_NAME, + "hostname": socket.gethostname(), + "image": IMAGE, + "root_path": ROOT_PATH_PREFIX, + "route": route, + }, + ) + + +if __name__ == "__main__": + httpd = HTTPServer((HOST, PORT), Handler) + print(f"Starting {SERVICE_NAME} on {HOST}:{PORT} with root path {ROOT_PATH_PREFIX or '/'}") + httpd.serve_forever() diff --git a/containers/api/requirements.txt b/containers/api/requirements.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/containers/api/requirements.txt @@ -0,0 +1 @@ + diff --git a/infra/README.md b/infra/README.md index 41dc582..1fd9435 100644 --- a/infra/README.md +++ b/infra/README.md @@ -63,6 +63,13 @@ stores state at: - `service_*` Own the ECS services and, when applicable, CodeDeploy resources. +Current examples include: + +- `task_worker` / `service_worker` + Internal ECS worker service shape. +- `task_api` / `service_api` + ECS API service shape exposed on the shared API Gateway at `/blue-green-api` using `vpc_link` and `blue_green`. + ## Dependency Notes - many modules use `data.terraform_remote_state` to read outputs from other stacks diff --git a/infra/live/dev/aws/service_api/terragrunt.hcl b/infra/live/dev/aws/service_api/terragrunt.hcl new file mode 100644 index 0000000..d5d10fd --- /dev/null +++ b/infra/live/dev/aws/service_api/terragrunt.hcl @@ -0,0 +1,7 @@ +include { + path = find_in_parent_folders("root.hcl") +} + +terraform { + source = "../../../../modules//aws//service_api" +} diff --git a/infra/live/dev/aws/task_api/terragrunt.hcl b/infra/live/dev/aws/task_api/terragrunt.hcl new file mode 100644 index 0000000..1263aab --- /dev/null +++ b/infra/live/dev/aws/task_api/terragrunt.hcl @@ -0,0 +1,7 @@ +include { + path = find_in_parent_folders("root.hcl") +} + +terraform { + source = "../../../../modules//aws//task_api" +} diff --git a/infra/live/prod/aws/service_api/terragrunt.hcl b/infra/live/prod/aws/service_api/terragrunt.hcl new file mode 100644 index 0000000..d5d10fd --- /dev/null +++ b/infra/live/prod/aws/service_api/terragrunt.hcl @@ -0,0 +1,7 @@ +include { + path = find_in_parent_folders("root.hcl") +} + +terraform { + source = "../../../../modules//aws//service_api" +} diff --git a/infra/live/prod/aws/task_api/terragrunt.hcl b/infra/live/prod/aws/task_api/terragrunt.hcl new file mode 100644 index 0000000..1263aab --- /dev/null +++ b/infra/live/prod/aws/task_api/terragrunt.hcl @@ -0,0 +1,7 @@ +include { + path = find_in_parent_folders("root.hcl") +} + +terraform { + source = "../../../../modules//aws//task_api" +} diff --git a/infra/modules/aws/service_api/README.md b/infra/modules/aws/service_api/README.md new file mode 100644 index 0000000..3382f3c --- /dev/null +++ b/infra/modules/aws/service_api/README.md @@ -0,0 +1,29 @@ +# `service_api` + +Concrete ECS API service wrapper for the blue/green API service. + +## Owns + +- blue/green ECS API service via `_shared/service` +- API Gateway VPC link routing on `/blue-green-api` + +## Dependencies + +- `task_api` remote state +- `cluster`, `network`, and `security` remote state + +## Key behavior + +- exposes the ECS API container on the shared HTTP API Gateway using `connection_type = "vpc_link"` +- uses `deployment_strategy = "blue_green"` +- defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled + +## Key outputs + +- `service_name` +- `cluster_name` +- `codedeploy_app_name` +- `codedeploy_deployment_group_name` +- `container_port` + +This module wires the blue/green ECS API service into the shared API Gateway and ALB infrastructure. diff --git a/infra/modules/aws/service_api/data.tf b/infra/modules/aws/service_api/data.tf new file mode 100644 index 0000000..4e91271 --- /dev/null +++ b/infra/modules/aws/service_api/data.tf @@ -0,0 +1,59 @@ +data "terraform_remote_state" "task_api" { + count = var.bootstrap ? 0 : 1 + backend = "s3" + + config = { + bucket = var.state_bucket + key = "${var.environment}/aws/task_api/terraform.tfstate" + region = var.aws_region + } +} + +data "terraform_remote_state" "network" { + backend = "s3" + + config = { + bucket = var.state_bucket + key = "${var.environment}/aws/network/terraform.tfstate" + region = var.aws_region + } +} + +data "terraform_remote_state" "security" { + backend = "s3" + + config = { + bucket = var.state_bucket + key = "${var.environment}/aws/security/terraform.tfstate" + region = var.aws_region + } +} + +data "terraform_remote_state" "cluster" { + backend = "s3" + + config = { + bucket = var.state_bucket + key = "${var.environment}/aws/cluster/terraform.tfstate" + region = var.aws_region + } +} + +data "aws_vpc" "this" { + filter { + name = "tag:Name" + values = [var.vpc_name] + } +} + +data "aws_subnets" "private" { + filter { + name = "vpc-id" + values = [data.aws_vpc.this.id] + } + + filter { + name = "tag:Name" + values = ["*private*"] + } +} diff --git a/infra/modules/aws/service_api/main.tf b/infra/modules/aws/service_api/main.tf new file mode 100644 index 0000000..4dff3f2 --- /dev/null +++ b/infra/modules/aws/service_api/main.tf @@ -0,0 +1,42 @@ +module "service_api" { + source = "../_shared/service" + + service_name = var.service_name + task_definition_arn = var.bootstrap ? "" : data.terraform_remote_state.task_api[0].outputs.task_definition_arn + container_port = var.container_port + root_path = var.root_path + connection_type = var.connection_type + + aws_region = var.aws_region + vpc_id = data.aws_vpc.this.id + private_subnet_ids = data.aws_subnets.private.ids + + cluster_id = data.terraform_remote_state.cluster.outputs.cluster_id + cluster_name = data.terraform_remote_state.cluster.outputs.cluster_name + ecs_security_group_id = data.terraform_remote_state.security.outputs.ecs_sg + + default_target_group_arn = data.terraform_remote_state.network.outputs.default_target_group_arn + default_http_listener_arn = data.terraform_remote_state.network.outputs.default_http_listener_arn + load_balancer_arn_suffix = data.terraform_remote_state.network.outputs.load_balancer_arn_suffix + target_group_arn_suffix = data.terraform_remote_state.network.outputs.target_group_arn_suffix + + api_id = data.terraform_remote_state.network.outputs.api_id + vpc_link_id = data.terraform_remote_state.network.outputs.vpc_link_id + internal_invoke_url = data.terraform_remote_state.network.outputs.internal_invoke_url + api_invoke_url = data.terraform_remote_state.network.outputs.api_invoke_url + + bootstrap = var.bootstrap + bootstrap_image_uri = var.bootstrap_image_uri + xray_enabled = var.xray_enabled + local_tunnel = var.local_tunnel + wait_for_steady_state = var.wait_for_steady_state + + desired_task_count = 1 + deployment_strategy = "blue_green" + codedeploy_alarm_names = [] + additional_security_group_ids = [] + + scaling_strategy = { + max_scaled_task_count = 2 + } +} diff --git a/infra/modules/aws/service_api/outputs.tf b/infra/modules/aws/service_api/outputs.tf new file mode 100644 index 0000000..d2be372 --- /dev/null +++ b/infra/modules/aws/service_api/outputs.tf @@ -0,0 +1,19 @@ +output "service_name" { + value = module.service_api.service_name +} + +output "cluster_name" { + value = data.terraform_remote_state.cluster.outputs.cluster_name +} + +output "codedeploy_app_name" { + value = module.service_api.codedeploy_app_name +} + +output "codedeploy_deployment_group_name" { + value = module.service_api.codedeploy_deployment_group_name +} + +output "container_port" { + value = var.container_port +} diff --git a/infra/modules/aws/service_api/variables.tf b/infra/modules/aws/service_api/variables.tf new file mode 100644 index 0000000..4dd8464 --- /dev/null +++ b/infra/modules/aws/service_api/variables.tf @@ -0,0 +1,77 @@ +### start of static vars set in root.hcl ### +variable "state_bucket" { + type = string +} + +variable "environment" { + type = string +} + +variable "aws_region" { + type = string +} + +variable "project_name" { + type = string +} +### end of static vars set in root.hcl ### + +variable "service_name" { + type = string + default = "ecs-blue-green-api" +} + +variable "vpc_name" { + type = string +} + +variable "container_port" { + type = number + default = 80 +} + +variable "root_path" { + description = "The path to serve the service from. / is for default /example_service is for subpath" + default = "blue-green-api" + type = string +} + +variable "connection_type" { + description = "Type of connectivity/integration to use for the service (choices: internal, internal_dns, vpc_link)." + type = string + default = "vpc_link" + validation { + condition = can(regex("^(internal|internal_dns|vpc_link)$", var.connection_type)) + error_message = "connection_type must be one of: internal, internal_dns, vpc_link." + } +} + +variable "local_tunnel" { + type = bool + default = false +} + +variable "xray_enabled" { + type = bool + default = false +} + +variable "wait_for_steady_state" { + type = bool + default = false +} + +variable "bootstrap" { + type = bool + default = false +} + +variable "bootstrap_image_uri" { + type = string + default = "" + + validation { + condition = !var.bootstrap || var.bootstrap_image_uri != "" + error_message = "bootstrap_image_uri must be set when bootstrap is true." + } +} diff --git a/infra/modules/aws/task_api/README.md b/infra/modules/aws/task_api/README.md new file mode 100644 index 0000000..947f922 --- /dev/null +++ b/infra/modules/aws/task_api/README.md @@ -0,0 +1,22 @@ +# `task_api` + +Concrete ECS API task wrapper for the blue/green API service. + +## Owns + +- blue/green ECS API task definition via `_shared/task` + +## Key behavior + +- runs the `containers/api` image +- publishes API task revisions for ECS deploys +- exposes the service on the `/blue-green-api` root path +- defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled + +## Key outputs + +- `task_definition_arn` +- `service_name` +- log group name + +This module is the image-driven deployment unit for the ECS blue/green API service. diff --git a/infra/modules/aws/task_api/main.tf b/infra/modules/aws/task_api/main.tf new file mode 100644 index 0000000..d89317e --- /dev/null +++ b/infra/modules/aws/task_api/main.tf @@ -0,0 +1,25 @@ +module "task_api" { + source = "../_shared/task" + + project_name = var.project_name + ecr_repository_name = var.ecr_repository_name + aws_region = var.aws_region + container_port = var.container_port + cpu = var.cpu + memory = var.memory + + image_uri = var.image_uri + debug_image_uri = var.debug_image_uri + aws_otel_collector_image_uri = var.aws_otel_collector_image_uri + otel_sampling_percentage = var.otel_sampling_percentage + + local_tunnel = var.local_tunnel + xray_enabled = var.xray_enabled + + additional_env_vars = [] + additional_runtime_policy_arns = [] + + root_path = "blue-green-api" + service_name = "ecs-blue-green-api" + command = ["python", "-u", "app.py"] +} diff --git a/infra/modules/aws/task_api/outputs.tf b/infra/modules/aws/task_api/outputs.tf new file mode 100644 index 0000000..b70f6c5 --- /dev/null +++ b/infra/modules/aws/task_api/outputs.tf @@ -0,0 +1,15 @@ +output "task_definition_arn" { + value = module.task_api.task_definition_arn +} + +output "cloudwatch_log_group" { + value = module.task_api.cloudwatch_log_group +} + +output "root_path" { + value = module.task_api.root_path +} + +output "service_name" { + value = module.task_api.service_name +} diff --git a/infra/modules/aws/task_api/variables.tf b/infra/modules/aws/task_api/variables.tf new file mode 100644 index 0000000..c4bd09a --- /dev/null +++ b/infra/modules/aws/task_api/variables.tf @@ -0,0 +1,64 @@ +### start of static vars set in root.hcl ### +variable "state_bucket" { + type = string +} + +variable "environment" { + type = string +} + +variable "aws_region" { + type = string +} + +variable "project_name" { + type = string +} + +variable "ecr_repository_name" { + type = string +} +### end of static vars set in root.hcl ### + +variable "container_port" { + type = number + default = 80 +} + +variable "cpu" { + type = number + default = 256 +} + +variable "memory" { + type = number + default = 512 +} + +variable "image_uri" { + type = string +} + +variable "aws_otel_collector_image_uri" { + type = string +} + +variable "otel_sampling_percentage" { + description = "Percentage of requests to send to x-ray" + type = string + default = 10.0 +} + +variable "debug_image_uri" { + type = string +} + +variable "local_tunnel" { + type = bool + default = false +} + +variable "xray_enabled" { + type = bool + default = false +} diff --git a/justfile b/justfile index c57e545..806493e 100644 --- a/justfile +++ b/justfile @@ -231,6 +231,7 @@ docker-build: docker build \ --file "{{PROJECT_DIR}}/Dockerfile" \ + --build-arg "SERVICE=$CONTAINER_NAME" \ --target "$CONTAINER_NAME" \ -t "$TAG" \ "{{PROJECT_DIR}}" From 0508eae03095ed76eaa6baf84adcaef4e992f5d2 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 14:41:38 +0100 Subject: [PATCH 03/16] fix: worker entrypoint --- infra/modules/aws/task_worker/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/modules/aws/task_worker/main.tf b/infra/modules/aws/task_worker/main.tf index 7d50919..95d90c0 100644 --- a/infra/modules/aws/task_worker/main.tf +++ b/infra/modules/aws/task_worker/main.tf @@ -21,5 +21,5 @@ module "task_consumer" { root_path = "" service_name = "ecs-worker" - command = ["python", "-u", "consumer/app.py"] + command = ["python", "-u", "app.py"] } From 119e2fc793fc4ca140bb07f35057b829d9bc3e07 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 15:00:23 +0100 Subject: [PATCH 04/16] chore: serve ecs to api --- README.md | 5 ++++- containers/api/app.py | 2 +- infra/README.md | 2 +- infra/modules/aws/api/README.md | 3 ++- infra/modules/aws/frontend/README.md | 10 ++++++++++ infra/modules/aws/service_api/README.md | 8 ++++---- infra/modules/aws/service_api/variables.tf | 4 ++-- infra/modules/aws/task_api/README.md | 8 ++++---- infra/modules/aws/task_api/main.tf | 4 ++-- 9 files changed, 30 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 82c1aea..cb93dfa 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,10 @@ The repo `network` module also owns the shared internal ALB and shared HTTP API This repo now includes a sample ECS API container service exposed separately from the Lambda API: -- ECS API path: `/blue-green-api` +- public Lambda path via CloudFront: `/api/*` +- public ECS path via CloudFront: `/api/ecs/*` +- API Gateway Lambda route namespace: `/*` +- API Gateway ECS route namespace: `/ecs/*` - deployment model: ECS CodeDeploy `blue_green` - stacks: `task_api` and `service_api` diff --git a/containers/api/app.py b/containers/api/app.py index 03decbe..17e0664 100644 --- a/containers/api/app.py +++ b/containers/api/app.py @@ -7,7 +7,7 @@ HOST = "0.0.0.0" PORT = int(os.getenv("PORT", "80")) ROOT_PATH = os.getenv("ROOT_PATH", "") -SERVICE_NAME = os.getenv("AWS_SERVICE_NAME", "ecs-blue-green-api") +SERVICE_NAME = os.getenv("AWS_SERVICE_NAME", "ecs-service-api") IMAGE = os.getenv("IMAGE", "unknown") diff --git a/infra/README.md b/infra/README.md index 1fd9435..d513386 100644 --- a/infra/README.md +++ b/infra/README.md @@ -68,7 +68,7 @@ Current examples include: - `task_worker` / `service_worker` Internal ECS worker service shape. - `task_api` / `service_api` - ECS API service shape exposed on the shared API Gateway at `/blue-green-api` using `vpc_link` and `blue_green`. + ECS API service shape exposed on the shared API Gateway at `/ecs` using `vpc_link` and `blue_green`. Through the frontend distribution it is reached at `/api/ecs/*`, while the Lambda API is reached at `/api/*`. ## Dependency Notes diff --git a/infra/modules/aws/api/README.md b/infra/modules/aws/api/README.md index 41918fe..4b12c18 100644 --- a/infra/modules/aws/api/README.md +++ b/infra/modules/aws/api/README.md @@ -6,7 +6,7 @@ Lambda-backed public HTTP API module. - Lambda API function via `_shared/lambda` - Lambda proxy integration into the shared HTTP API -- root and proxy routes +- root and proxy routes on the shared API - API 5xx CloudWatch alarm ## Dependencies @@ -22,3 +22,4 @@ Lambda-backed public HTTP API module. - Lambda function and alias names This module is Lambda-specific. The shared API surface now lives in `network`. +When accessed through the frontend CloudFront distribution, the public Lambda path is `/api/*` because CloudFront strips the leading `/api` prefix before forwarding to API Gateway. diff --git a/infra/modules/aws/frontend/README.md b/infra/modules/aws/frontend/README.md index 04c1cf8..19eb7ab 100644 --- a/infra/modules/aws/frontend/README.md +++ b/infra/modules/aws/frontend/README.md @@ -6,6 +6,16 @@ Static frontend hosting module. - website bucket and distribution resources - deployment destination for built frontend assets +- path-based forwarding of `/api/*` requests to the shared API origin + +## Routing behavior + +- `/api/*` + forwarded to API Gateway and stripped to `/*` for the Lambda-backed API +- `/api/ecs/*` + forwarded to API Gateway and stripped to `/ecs/*` +- all other paths + served from the frontend bucket with SPA routing ## Key outputs diff --git a/infra/modules/aws/service_api/README.md b/infra/modules/aws/service_api/README.md index 3382f3c..f3a6c5c 100644 --- a/infra/modules/aws/service_api/README.md +++ b/infra/modules/aws/service_api/README.md @@ -1,11 +1,11 @@ # `service_api` -Concrete ECS API service wrapper for the blue/green API service. +Concrete ECS API service wrapper for the sample API service. ## Owns -- blue/green ECS API service via `_shared/service` -- API Gateway VPC link routing on `/blue-green-api` +- sample ECS API service via `_shared/service` +- API Gateway VPC link routing on `/ecs` ## Dependencies @@ -26,4 +26,4 @@ Concrete ECS API service wrapper for the blue/green API service. - `codedeploy_deployment_group_name` - `container_port` -This module wires the blue/green ECS API service into the shared API Gateway and ALB infrastructure. +This module wires the sample ECS API service into the shared API Gateway and ALB infrastructure. diff --git a/infra/modules/aws/service_api/variables.tf b/infra/modules/aws/service_api/variables.tf index 4dd8464..7facdd2 100644 --- a/infra/modules/aws/service_api/variables.tf +++ b/infra/modules/aws/service_api/variables.tf @@ -18,7 +18,7 @@ variable "project_name" { variable "service_name" { type = string - default = "ecs-blue-green-api" + default = "ecs-service-api" } variable "vpc_name" { @@ -32,7 +32,7 @@ variable "container_port" { variable "root_path" { description = "The path to serve the service from. / is for default /example_service is for subpath" - default = "blue-green-api" + default = "ecs" type = string } diff --git a/infra/modules/aws/task_api/README.md b/infra/modules/aws/task_api/README.md index 947f922..2014493 100644 --- a/infra/modules/aws/task_api/README.md +++ b/infra/modules/aws/task_api/README.md @@ -1,16 +1,16 @@ # `task_api` -Concrete ECS API task wrapper for the blue/green API service. +Concrete ECS API task wrapper for the sample API service. ## Owns -- blue/green ECS API task definition via `_shared/task` +- sample ECS API task definition via `_shared/task` ## Key behavior - runs the `containers/api` image - publishes API task revisions for ECS deploys -- exposes the service on the `/blue-green-api` root path +- exposes the service on the `/ecs` root path - defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled ## Key outputs @@ -19,4 +19,4 @@ Concrete ECS API task wrapper for the blue/green API service. - `service_name` - log group name -This module is the image-driven deployment unit for the ECS blue/green API service. +This module is the image-driven deployment unit for the sample ECS API service. diff --git a/infra/modules/aws/task_api/main.tf b/infra/modules/aws/task_api/main.tf index d89317e..fd03b27 100644 --- a/infra/modules/aws/task_api/main.tf +++ b/infra/modules/aws/task_api/main.tf @@ -19,7 +19,7 @@ module "task_api" { additional_env_vars = [] additional_runtime_policy_arns = [] - root_path = "blue-green-api" - service_name = "ecs-blue-green-api" + root_path = "ecs" + service_name = "ecs-service-api" command = ["python", "-u", "app.py"] } From 08fb18f74c8a6ac44b63545fcca9864e4c16a1d8 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 15:05:02 +0100 Subject: [PATCH 05/16] chore: display ecs response in frontend --- README.md | 1 + frontend/src/App.jsx | 51 ++++++++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index cb93dfa..015a80c 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ This repo now includes a sample ECS API container service exposed separately fro - API Gateway ECS route namespace: `/ecs/*` - deployment model: ECS CodeDeploy `blue_green` - stacks: `task_api` and `service_api` +- the sample frontend calls both backends and renders both responses so the path split is visible in the UI The `api` module is Lambda-specific and plugs the Lambda integration and root routes into that shared API. diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 6d33c84..a03c28d 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,33 +1,48 @@ import { useEffect, useState } from 'react' export default function App() { - const [data, setData] = useState(null) - const [error, setError] = useState(null) + const [lambdaData, setLambdaData] = useState(null) + const [lambdaError, setLambdaError] = useState(null) + const [ecsData, setEcsData] = useState(null) + const [ecsError, setEcsError] = useState(null) useEffect(() => { fetch('/api/') .then((r) => r.json()) - .then(setData) - .catch(setError) + .then(setLambdaData) + .catch(setLambdaError) + + fetch('/api/ecs/') + .then((r) => r.json()) + .then(setEcsData) + .catch(setEcsError) }, []) + const renderTable = (data) => ( + + + {Object.entries(data).map(([key, value]) => ( + + + + + ))} + +
{key}{String(value)}
+ ) + return (

Serverless App

- {error &&

Error: {String(error)}

} - {data && ( - - - {Object.entries(data).map(([k, v]) => ( - - - - - ))} - -
{k}{String(v)}
- )} - {!data && !error &&

Loading...

} +

Lambda Response

+ {lambdaError &&

Error: {String(lambdaError)}

} + {lambdaData && renderTable(lambdaData)} + {!lambdaData && !lambdaError &&

Loading Lambda response...

} + +

ECS Response

+ {ecsError &&

Error: {String(ecsError)}

} + {ecsData && renderTable(ecsData)} + {!ecsData && !ecsError &&

Loading ECS response...

}
) } From 05d208667c70b3b27c38e6d24c288768c35a1f86 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 15:35:53 +0100 Subject: [PATCH 06/16] chore: bootstrap frontend file --- README.md | 2 + infra/modules/aws/frontend/README.md | 3 ++ .../modules/aws/frontend/bootstrap/index.html | 44 +++++++++++++++++++ infra/modules/aws/frontend/main.tf | 8 ++++ 4 files changed, 57 insertions(+) create mode 100644 infra/modules/aws/frontend/bootstrap/index.html diff --git a/README.md b/README.md index 015a80c..d84264b 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ This repo now includes a sample ECS API container service exposed separately fro The `api` module is Lambda-specific and plugs the Lambda integration and root routes into that shared API. +The frontend infra module also uploads a bootstrap `index.html` during infra apply so CloudFront serves a placeholder page before the built frontend assets are deployed. + Terragrunt also provides a shared default ECR repository name to ECS task modules: - shared artifact base: `dev -> ---dev`, otherwise `---ci` diff --git a/infra/modules/aws/frontend/README.md b/infra/modules/aws/frontend/README.md index 19eb7ab..c94ce3c 100644 --- a/infra/modules/aws/frontend/README.md +++ b/infra/modules/aws/frontend/README.md @@ -5,6 +5,7 @@ Static frontend hosting module. ## Owns - website bucket and distribution resources +- bootstrap `index.html` object for first-time infra deploys - deployment destination for built frontend assets - path-based forwarding of `/api/*` requests to the shared API origin @@ -23,3 +24,5 @@ Static frontend hosting module. - CloudFront distribution id Used by the frontend build and deploy workflow path. + +The Terraform module uploads a bootstrap `index.html` so the distribution serves a valid page before the built frontend assets are published. Later frontend deploys replace that object with the real app bundle output. diff --git a/infra/modules/aws/frontend/bootstrap/index.html b/infra/modules/aws/frontend/bootstrap/index.html new file mode 100644 index 0000000..63d7a9d --- /dev/null +++ b/infra/modules/aws/frontend/bootstrap/index.html @@ -0,0 +1,44 @@ + + + + + + Frontend Bootstrap + + + +
+

Frontend Bootstrap

+

The frontend infrastructure is up, but the built frontend assets have not been deployed yet.

+

Deploy the frontend bundle to replace this bootstrap page and serve the full app from index.html.

+
+ + diff --git a/infra/modules/aws/frontend/main.tf b/infra/modules/aws/frontend/main.tf index ef786fd..24b2401 100644 --- a/infra/modules/aws/frontend/main.tf +++ b/infra/modules/aws/frontend/main.tf @@ -25,6 +25,14 @@ resource "aws_s3_bucket_policy" "frontend" { policy = data.aws_iam_policy_document.frontend_bucket_policy.json } +resource "aws_s3_object" "bootstrap_index" { + bucket = aws_s3_bucket.frontend.id + key = local.root_file + source = "${path.module}/bootstrap/index.html" + etag = filemd5("${path.module}/bootstrap/index.html") + content_type = "text/html; charset=utf-8" +} + resource "aws_cloudfront_function" "spa_routing" { name = "${local.name}-spa-routing" runtime = "cloudfront-js-2.0" From 08ac418fb212ce43a66bc91a61bab450bc48ddb0 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 15:45:46 +0100 Subject: [PATCH 07/16] chore: rename consumer = worker --- infra/modules/aws/lambda_worker/README.md | 2 ++ infra/modules/aws/lambda_worker/outputs.tf | 8 ++++++++ infra/modules/aws/service_worker/README.md | 2 ++ infra/modules/aws/service_worker/main.tf | 16 ++++++++-------- infra/modules/aws/service_worker/outputs.tf | 6 +++--- infra/modules/aws/task_worker/README.md | 3 ++- infra/modules/aws/task_worker/data.tf | 9 +++++++++ infra/modules/aws/task_worker/main.tf | 13 ++++++++++--- infra/modules/aws/task_worker/outputs.tf | 8 ++++---- 9 files changed, 48 insertions(+), 19 deletions(-) create mode 100644 infra/modules/aws/task_worker/data.tf diff --git a/infra/modules/aws/lambda_worker/README.md b/infra/modules/aws/lambda_worker/README.md index 7e1b868..2116fcc 100644 --- a/infra/modules/aws/lambda_worker/README.md +++ b/infra/modules/aws/lambda_worker/README.md @@ -10,6 +10,8 @@ Worker Lambda wrapper module. ## Key outputs - Lambda function and alias names +- queue name and queue URLs +- SQS read policy ARN - queue URLs - log group diff --git a/infra/modules/aws/lambda_worker/outputs.tf b/infra/modules/aws/lambda_worker/outputs.tf index 0b31175..7f21284 100644 --- a/infra/modules/aws/lambda_worker/outputs.tf +++ b/infra/modules/aws/lambda_worker/outputs.tf @@ -18,6 +18,14 @@ output "sqs_queue_url" { value = module.sqs_queue.sqs_queue_url } +output "sqs_queue_name" { + value = module.sqs_queue.sqs_queue_name +} + +output "sqs_queue_read_policy_arn" { + value = module.sqs_queue.sqs_queue_read_policy_arn +} + output "dead_letter_queue_url" { value = module.sqs_queue.dead_letter_queue_url } diff --git a/infra/modules/aws/service_worker/README.md b/infra/modules/aws/service_worker/README.md index a657418..b405a76 100644 --- a/infra/modules/aws/service_worker/README.md +++ b/infra/modules/aws/service_worker/README.md @@ -20,3 +20,5 @@ Concrete ECS worker service wrapper. - `container_port` This module wires the worker-specific service onto the shared ECS service behavior. + +It also uses the shared worker SQS queue name from `lambda_worker` for ECS service autoscaling. diff --git a/infra/modules/aws/service_worker/main.tf b/infra/modules/aws/service_worker/main.tf index 48f7bb3..e79647e 100644 --- a/infra/modules/aws/service_worker/main.tf +++ b/infra/modules/aws/service_worker/main.tf @@ -1,4 +1,4 @@ -module "service_consumer" { +module "service_worker" { source = "../_shared/service" service_name = var.service_name @@ -35,13 +35,13 @@ module "service_consumer" { scaling_strategy = { max_scaled_task_count = 4 sqs = { - scale_out_threshold = 10 # Start scaling at 10 msgs avg - scale_in_threshold = 2 # Scale in below 2 msgs avg - scale_out_adjustment = 2 # Add 2 tasks at once - scale_in_adjustment = 1 # Remove 1 task - cooldown_out = 60 # 1min cooldown (more stable) - cooldown_in = 300 # 5min cooldown (prevent flapping) - queue_name = "tbc" # SQS queue name to monitor for scaling + scale_out_threshold = 10 # Start scaling at 10 msgs avg + scale_in_threshold = 2 # Scale in below 2 msgs avg + scale_out_adjustment = 2 # Add 2 tasks at once + scale_in_adjustment = 1 # Remove 1 task + cooldown_out = 60 # 1min cooldown (more stable) + cooldown_in = 300 # 5min cooldown (prevent flapping) + queue_name = data.terraform_remote_state.lambda_worker.outputs.sqs_queue_name } } } diff --git a/infra/modules/aws/service_worker/outputs.tf b/infra/modules/aws/service_worker/outputs.tf index 1eb8524..38a97a9 100644 --- a/infra/modules/aws/service_worker/outputs.tf +++ b/infra/modules/aws/service_worker/outputs.tf @@ -1,5 +1,5 @@ output "service_name" { - value = module.service_consumer.service_name + value = module.service_worker.service_name } output "cluster_name" { @@ -7,11 +7,11 @@ output "cluster_name" { } output "codedeploy_app_name" { - value = module.service_consumer.codedeploy_app_name + value = module.service_worker.codedeploy_app_name } output "codedeploy_deployment_group_name" { - value = module.service_consumer.codedeploy_deployment_group_name + value = module.service_worker.codedeploy_deployment_group_name } output "container_port" { diff --git a/infra/modules/aws/task_worker/README.md b/infra/modules/aws/task_worker/README.md index 6b43505..254a2e8 100644 --- a/infra/modules/aws/task_worker/README.md +++ b/infra/modules/aws/task_worker/README.md @@ -8,9 +8,10 @@ Concrete ECS worker task wrapper. ## Key behavior -- runs `python -u consumer/app.py` +- runs `python -u app.py` - publishes worker task revisions for ECS deploys - uses the shared ECR repository named by `ecr_repository_name` +- reads from the shared worker SQS queue via `AWS_SQS_QUEUE_URL` - defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled ## Key outputs diff --git a/infra/modules/aws/task_worker/data.tf b/infra/modules/aws/task_worker/data.tf new file mode 100644 index 0000000..a4df307 --- /dev/null +++ b/infra/modules/aws/task_worker/data.tf @@ -0,0 +1,9 @@ +data "terraform_remote_state" "lambda_worker" { + backend = "s3" + + config = { + bucket = var.state_bucket + key = "${var.environment}/aws/lambda_worker/terraform.tfstate" + region = var.aws_region + } +} diff --git a/infra/modules/aws/task_worker/main.tf b/infra/modules/aws/task_worker/main.tf index 95d90c0..6ebf08f 100644 --- a/infra/modules/aws/task_worker/main.tf +++ b/infra/modules/aws/task_worker/main.tf @@ -1,4 +1,4 @@ -module "task_consumer" { +module "task_worker" { source = "../_shared/task" project_name = var.project_name @@ -16,8 +16,15 @@ module "task_consumer" { local_tunnel = var.local_tunnel xray_enabled = var.xray_enabled - additional_env_vars = [] - additional_runtime_policy_arns = [] + additional_env_vars = [ + { + name = "AWS_SQS_QUEUE_URL" + value = data.terraform_remote_state.lambda_worker.outputs.sqs_queue_url + } + ] + additional_runtime_policy_arns = [ + data.terraform_remote_state.lambda_worker.outputs.sqs_queue_read_policy_arn + ] root_path = "" service_name = "ecs-worker" diff --git a/infra/modules/aws/task_worker/outputs.tf b/infra/modules/aws/task_worker/outputs.tf index 8aadc29..952dcd3 100644 --- a/infra/modules/aws/task_worker/outputs.tf +++ b/infra/modules/aws/task_worker/outputs.tf @@ -1,15 +1,15 @@ output "task_definition_arn" { - value = module.task_consumer.task_definition_arn + value = module.task_worker.task_definition_arn } output "cloudwatch_log_group" { - value = module.task_consumer.cloudwatch_log_group + value = module.task_worker.cloudwatch_log_group } output "root_path" { - value = module.task_consumer.root_path + value = module.task_worker.root_path } output "service_name" { - value = module.task_consumer.service_name + value = module.task_worker.service_name } From da64b60a4380c61ca69dd1ce8c47172e33bbe87d Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 16:08:20 +0100 Subject: [PATCH 08/16] fix: sqs handle bootstrap --- AGENTS.md | 3 +++ README.md | 3 +++ infra/README.md | 1 + infra/modules/aws/lambda_worker/README.md | 4 ++-- infra/modules/aws/service_worker/README.md | 5 +++-- infra/modules/aws/service_worker/data.tf | 10 ---------- infra/modules/aws/service_worker/locals.tf | 6 ++++++ infra/modules/aws/service_worker/main.tf | 4 ++-- infra/modules/aws/task_worker/README.md | 7 +++++-- infra/modules/aws/task_worker/data.tf | 9 --------- infra/modules/aws/task_worker/local.tf | 4 ++++ infra/modules/aws/task_worker/main.tf | 11 +++++++++-- infra/modules/aws/task_worker/outputs.tf | 12 ++++++++++++ 13 files changed, 50 insertions(+), 29 deletions(-) create mode 100644 infra/modules/aws/service_worker/locals.tf create mode 100644 infra/modules/aws/task_worker/local.tf diff --git a/AGENTS.md b/AGENTS.md index 7bc3944..90635ca 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -83,6 +83,9 @@ When changing CI workflows or Terraform module dependencies, check dependency be - when the same setup or lookup pattern appears in multiple workflows, suggest extracting it into a shared reusable workflow or shared `just` recipe instead of repeating it - check workflow dependency wiring such as `needs`, job outputs, matrix values, and reused workflow inputs - watch for `data.terraform_remote_state` dependencies that can fail if another stack has not been created yet or has already been destroyed +- avoid cross-runtime ownership when a resource is really part of one app shape; for example, keep the ECS worker queue with `task_worker` rather than making ECS consume `lambda_worker` state +- when a bootstrap path needs placeholder values, prefer hiding that conditional logic in locals instead of repeating `count`-indexed remote-state references through the module body +- if you do add a genuinely new stack type, update the discovery and lifecycle workflows too: `get_directories.yml`, `infra.yml`, and `destroy.yml` - check required Terraform input variables on destroy paths as well as apply paths; destroy can still fail before resource deletion if required vars are unset - make sure every referenced `needs..outputs.*` value is actually in scope for that job - make sure matrix values match the expected naming contract for the workflow, module, or path being used diff --git a/README.md b/README.md index d84264b..a78d26f 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,9 @@ The reusable deploy workflows follow the same split: `prod` `*_code` and `*_infr For `*_code` release deploys, pass explicit release versions for each runtime you want to roll out. In particular, ECS code deploys should provide an `ecs_version` rather than relying on a Lambda-version fallback. +The ECS worker queue is now owned by `task_worker`, and `service_worker` reads that queue name from `task_worker` remote state. That keeps the ECS worker queue aligned with the worker stack lifecycle without depending on the Lambda worker queue. +For bootstrap service applies, `service_worker` now uses placeholder task and queue values locally rather than spreading `count`-indexed remote-state access through the module. + ## 🧪 example prompts Use prompts like these when asking for a new service in this repo: diff --git a/infra/README.md b/infra/README.md index d513386..bbda4c8 100644 --- a/infra/README.md +++ b/infra/README.md @@ -74,6 +74,7 @@ Current examples include: - many modules use `data.terraform_remote_state` to read outputs from other stacks - because of that, workflow ordering matters for apply, deploy, and destroy +- avoid making one runtime depend on another runtime's state ownership unnecessarily; for example, the ECS worker queue is owned by `task_worker` rather than by `lambda_worker` - some shared infrastructure, such as the landing-zone VPC and tagged private subnets, is discovered with `data` lookups and must already exist ## Deployment Model diff --git a/infra/modules/aws/lambda_worker/README.md b/infra/modules/aws/lambda_worker/README.md index 2116fcc..6082ba0 100644 --- a/infra/modules/aws/lambda_worker/README.md +++ b/infra/modules/aws/lambda_worker/README.md @@ -5,7 +5,7 @@ Worker Lambda wrapper module. ## Owns - worker Lambda via `_shared/lambda` -- worker queue integration via `_shared/sqs` +- Lambda worker queue integration via `_shared/sqs` ## Key outputs @@ -15,4 +15,4 @@ Worker Lambda wrapper module. - queue URLs - log group -This is the concrete worker implementation on top of the shared Lambda primitives. +This is the concrete worker implementation on top of the shared Lambda primitives. Its queue is owned for Lambda worker processing and is no longer the queue used by the ECS worker service. diff --git a/infra/modules/aws/service_worker/README.md b/infra/modules/aws/service_worker/README.md index b405a76..139a8f0 100644 --- a/infra/modules/aws/service_worker/README.md +++ b/infra/modules/aws/service_worker/README.md @@ -9,7 +9,7 @@ Concrete ECS worker service wrapper. ## Dependencies - `task_worker` remote state -- `cluster`, `network`, `security`, `api`, and `lambda_worker` remote state +- `cluster`, `network`, and `security` remote state ## Key outputs @@ -21,4 +21,5 @@ Concrete ECS worker service wrapper. This module wires the worker-specific service onto the shared ECS service behavior. -It also uses the shared worker SQS queue name from `lambda_worker` for ECS service autoscaling. +It uses the ECS worker queue name exported by `task_worker` for service autoscaling. +During bootstrap applies, it uses placeholder values instead of reading task outputs directly so the bootstrap path does not need a pre-existing task state file. diff --git a/infra/modules/aws/service_worker/data.tf b/infra/modules/aws/service_worker/data.tf index f20b5fd..45e6c98 100644 --- a/infra/modules/aws/service_worker/data.tf +++ b/infra/modules/aws/service_worker/data.tf @@ -9,16 +9,6 @@ data "terraform_remote_state" "task_worker" { } } -data "terraform_remote_state" "lambda_worker" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/lambda_worker/terraform.tfstate" - region = var.aws_region - } -} - data "terraform_remote_state" "network" { backend = "s3" diff --git a/infra/modules/aws/service_worker/locals.tf b/infra/modules/aws/service_worker/locals.tf new file mode 100644 index 0000000..05e6e03 --- /dev/null +++ b/infra/modules/aws/service_worker/locals.tf @@ -0,0 +1,6 @@ +locals { + task_worker_outputs = var.bootstrap ? null : one(data.terraform_remote_state.task_worker[*].outputs) + + task_definition_arn = var.bootstrap ? "" : local.task_worker_outputs.task_definition_arn + autoscaling_queue_name = var.bootstrap ? "not_set" : local.task_worker_outputs.sqs_queue_name +} diff --git a/infra/modules/aws/service_worker/main.tf b/infra/modules/aws/service_worker/main.tf index e79647e..626599d 100644 --- a/infra/modules/aws/service_worker/main.tf +++ b/infra/modules/aws/service_worker/main.tf @@ -2,7 +2,7 @@ module "service_worker" { source = "../_shared/service" service_name = var.service_name - task_definition_arn = var.bootstrap ? "" : data.terraform_remote_state.task_worker[0].outputs.task_definition_arn + task_definition_arn = local.task_definition_arn container_port = var.container_port root_path = var.root_path connection_type = var.connection_type @@ -41,7 +41,7 @@ module "service_worker" { scale_in_adjustment = 1 # Remove 1 task cooldown_out = 60 # 1min cooldown (more stable) cooldown_in = 300 # 5min cooldown (prevent flapping) - queue_name = data.terraform_remote_state.lambda_worker.outputs.sqs_queue_name + queue_name = local.autoscaling_queue_name } } } diff --git a/infra/modules/aws/task_worker/README.md b/infra/modules/aws/task_worker/README.md index 254a2e8..d9a8a5b 100644 --- a/infra/modules/aws/task_worker/README.md +++ b/infra/modules/aws/task_worker/README.md @@ -5,19 +5,22 @@ Concrete ECS worker task wrapper. ## Owns - worker ECS task definition via `_shared/task` +- ECS worker queue via `_shared/sqs` ## Key behavior - runs `python -u app.py` - publishes worker task revisions for ECS deploys - uses the shared ECR repository named by `ecr_repository_name` -- reads from the shared worker SQS queue via `AWS_SQS_QUEUE_URL` +- injects its own queue URL into the container via `AWS_SQS_QUEUE_URL` - defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled ## Key outputs - `task_definition_arn` - `service_name` +- `sqs_queue_name` +- `sqs_queue_url` - log group name -This module is the image-driven deployment unit for the ECS worker. +This module is the image-driven deployment unit for the ECS worker and owns the ECS worker queue directly so queue creation follows the task stack lifecycle. diff --git a/infra/modules/aws/task_worker/data.tf b/infra/modules/aws/task_worker/data.tf index a4df307..e69de29 100644 --- a/infra/modules/aws/task_worker/data.tf +++ b/infra/modules/aws/task_worker/data.tf @@ -1,9 +0,0 @@ -data "terraform_remote_state" "lambda_worker" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/lambda_worker/terraform.tfstate" - region = var.aws_region - } -} diff --git a/infra/modules/aws/task_worker/local.tf b/infra/modules/aws/task_worker/local.tf new file mode 100644 index 0000000..88347cf --- /dev/null +++ b/infra/modules/aws/task_worker/local.tf @@ -0,0 +1,4 @@ +locals { + sqs_queue_name = "${var.project_name}-${var.environment}-ecs-worker-queue" + sqs_dlq_name = "${var.project_name}-${var.environment}-ecs-worker-dlq" +} diff --git a/infra/modules/aws/task_worker/main.tf b/infra/modules/aws/task_worker/main.tf index 6ebf08f..45fc1ce 100644 --- a/infra/modules/aws/task_worker/main.tf +++ b/infra/modules/aws/task_worker/main.tf @@ -1,3 +1,10 @@ +module "sqs_queue" { + source = "../_shared/sqs" + + sqs_queue_name = local.sqs_queue_name + sqs_dlq_name = local.sqs_dlq_name +} + module "task_worker" { source = "../_shared/task" @@ -19,11 +26,11 @@ module "task_worker" { additional_env_vars = [ { name = "AWS_SQS_QUEUE_URL" - value = data.terraform_remote_state.lambda_worker.outputs.sqs_queue_url + value = module.sqs_queue.sqs_queue_url } ] additional_runtime_policy_arns = [ - data.terraform_remote_state.lambda_worker.outputs.sqs_queue_read_policy_arn + module.sqs_queue.sqs_queue_read_policy_arn ] root_path = "" diff --git a/infra/modules/aws/task_worker/outputs.tf b/infra/modules/aws/task_worker/outputs.tf index 952dcd3..20ee03f 100644 --- a/infra/modules/aws/task_worker/outputs.tf +++ b/infra/modules/aws/task_worker/outputs.tf @@ -13,3 +13,15 @@ output "root_path" { output "service_name" { value = module.task_worker.service_name } + +output "sqs_queue_name" { + value = module.sqs_queue.sqs_queue_name +} + +output "sqs_queue_url" { + value = module.sqs_queue.sqs_queue_url +} + +output "sqs_queue_read_policy_arn" { + value = module.sqs_queue.sqs_queue_read_policy_arn +} From 80e76ef9744cb5356c3bda0966b81f18eee503f7 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 16:20:52 +0100 Subject: [PATCH 09/16] chore: rename jobs --- .github/workflows/deploy_dev_code.yml | 10 +++++----- .github/workflows/deploy_dev_infra.yml | 8 ++++---- .github/workflows/deploy_prod_code.yml | 6 +++--- .github/workflows/deploy_prod_infra.yml | 6 +++--- .github/workflows/destroy.yml | 22 +++++++++++----------- .github/workflows/pull_request.yml | 24 ++++++++++++------------ .github/workflows/release.yml | 16 ++++++++-------- 7 files changed, 46 insertions(+), 46 deletions(-) diff --git a/.github/workflows/deploy_dev_code.yml b/.github/workflows/deploy_dev_code.yml index 294e41d..5d6e6a0 100644 --- a/.github/workflows/deploy_dev_code.yml +++ b/.github/workflows/deploy_dev_code.yml @@ -1,4 +1,4 @@ -name: Dev Deploy Code +name: Dev Code # This deploys code changes to dev without touching infra. on: @@ -10,11 +10,11 @@ permissions: jobs: setup: - name: Discover Directories + name: Discover uses: ./.github/workflows/get_directories.yml build: - name: Build Artifacts + name: Build uses: ./.github/workflows/build.yml needs: - setup @@ -28,7 +28,7 @@ jobs: get_build: - name: Resolve Build Outputs + name: Resolve needs: build uses: ./.github/workflows/build_get.yml with: @@ -38,7 +38,7 @@ jobs: ecs_version: ${{ needs.build.outputs.ecs_version }} deploy: - name: Deploy Code + name: Deploy uses: ./.github/workflows/deploy.yml needs: - setup diff --git a/.github/workflows/deploy_dev_infra.yml b/.github/workflows/deploy_dev_infra.yml index a553fec..c94033b 100644 --- a/.github/workflows/deploy_dev_infra.yml +++ b/.github/workflows/deploy_dev_infra.yml @@ -1,4 +1,4 @@ -name: Dev Deploy Infra +name: Dev Infra on: workflow_dispatch: @@ -9,18 +9,18 @@ permissions: jobs: setup: - name: Discover Directories + name: Discover uses: ./.github/workflows/get_directories.yml code: - name: Prepare Infra Artifacts + name: Artifacts uses: ./.github/workflows/infra_releases.yml with: environment: dev infra_version: ${{ github.sha }} infra: - name: Apply Infrastructure + name: Apply needs: - setup - code diff --git a/.github/workflows/deploy_prod_code.yml b/.github/workflows/deploy_prod_code.yml index 49bb5c7..172c137 100644 --- a/.github/workflows/deploy_prod_code.yml +++ b/.github/workflows/deploy_prod_code.yml @@ -1,4 +1,4 @@ -name: Prod Deploy Code +name: Prod Code # This deploys code changes to prod from release artifacts already present in the shared CI bucket and ECR repository. on: @@ -20,7 +20,7 @@ permissions: jobs: get_build: - name: Resolve Release Artifacts + name: Resolve uses: ./.github/workflows/build_get.yml with: environment: ci @@ -29,7 +29,7 @@ jobs: ecs_version: ${{ inputs.ecs_version }} deploy: - name: Deploy Code + name: Deploy uses: ./.github/workflows/deploy.yml needs: - get_build diff --git a/.github/workflows/deploy_prod_infra.yml b/.github/workflows/deploy_prod_infra.yml index b0ade0e..b55fe9c 100644 --- a/.github/workflows/deploy_prod_infra.yml +++ b/.github/workflows/deploy_prod_infra.yml @@ -1,4 +1,4 @@ -name: Prod Deploy Infra +name: Prod Infra on: workflow_dispatch: @@ -9,7 +9,7 @@ permissions: jobs: get_build: - name: Resolve Release Artifacts + name: Resolve uses: ./.github/workflows/build_get.yml with: environment: ci @@ -18,7 +18,7 @@ jobs: ecs_version: 0.9.4 infra: - name: Apply Infrastructure + name: Apply needs: - get_build uses: ./.github/workflows/infra.yml diff --git a/.github/workflows/destroy.yml b/.github/workflows/destroy.yml index a2435f9..d0f372b 100644 --- a/.github/workflows/destroy.yml +++ b/.github/workflows/destroy.yml @@ -1,4 +1,4 @@ -name: Kill Environment +name: Destroy on: workflow_dispatch: @@ -25,11 +25,11 @@ env: jobs: setup: - name: Discover Directories + name: Discover uses: ./.github/workflows/get_directories.yml lambdas: - name: Destroy Lambda Infra + name: Lambdas runs-on: ubuntu-latest needs: setup strategy: @@ -47,7 +47,7 @@ jobs: tg_action: destroy frontend: - name: Destroy Frontend Infra + name: Frontend runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -62,7 +62,7 @@ jobs: tg_action: destroy services: - name: Destroy Service Infra + name: Services runs-on: ubuntu-latest needs: setup strategy: @@ -83,7 +83,7 @@ jobs: tg_action: destroy tasks: - name: Destroy Task Infra + name: Tasks runs-on: ubuntu-latest needs: - setup @@ -107,7 +107,7 @@ jobs: tg_action: destroy network: - name: Destroy Network Infra + name: Network needs: - frontend - services @@ -124,7 +124,7 @@ jobs: tg_action: destroy security: - name: Destroy Security Infra + name: Security needs: - network runs-on: ubuntu-latest @@ -139,7 +139,7 @@ jobs: tg_action: destroy build-bucket: - name: Destroy Code Bucket + name: Code Bucket if: inputs.environment != 'prod' needs: - lambdas @@ -155,7 +155,7 @@ jobs: tg_action: destroy ecr: - name: Destroy ECR + name: ECR if: inputs.environment != 'prod' needs: - network @@ -171,7 +171,7 @@ jobs: tg_action: destroy cluster: - name: Destroy Cluster Infra + name: Cluster needs: - network runs-on: ubuntu-latest diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0fc5744..bf2d9f2 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1,4 +1,4 @@ -name: Pull Request +name: PR on: pull_request: @@ -14,7 +14,7 @@ on: jobs: check-pr-title: - name: Validate PR Title + name: PR Title runs-on: ubuntu-latest env: PR_TITLE: ${{ github.event.pull_request.title }} @@ -33,7 +33,7 @@ jobs: exit 1 check: - name: Detect Changes + name: Changes needs: check-pr-title permissions: pull-requests: read @@ -47,7 +47,7 @@ jobs: needs: check runs-on: ubuntu-latest if: ${{ needs.check.outputs.github == 'true' }} - name: Run github formatting checks + name: GH Fmt timeout-minutes: 2 steps: - uses: actions/checkout@v6 @@ -60,7 +60,7 @@ jobs: needs: check runs-on: ubuntu-latest if: ${{ needs.check.outputs.terragrunt == 'true' }} - name: Run terragrunt formatting checks + name: TG Fmt timeout-minutes: 2 steps: - uses: actions/checkout@v6 @@ -81,7 +81,7 @@ jobs: needs: check runs-on: ubuntu-latest if: ${{ needs.check.outputs.terraform == 'true' }} - name: Run terraform lint checks + name: TF Lint timeout-minutes: 2 steps: - uses: actions/checkout@v6 @@ -98,7 +98,7 @@ jobs: check-lambda-naming: needs: check runs-on: ubuntu-latest - name: Check lambda directory naming uses underscores + name: Lambda Names steps: - uses: actions/checkout@v6 - name: Fail if any lambda directory uses hyphens @@ -113,7 +113,7 @@ jobs: check-ecs-module-pairs: needs: check runs-on: ubuntu-latest - name: Check ECS task/service module pairs + name: ECS Pairs steps: - uses: actions/checkout@v6 @@ -160,13 +160,13 @@ jobs: echo "✅ All ECS task_/service_ pairs are present." setup: - name: Discover App Directories + name: Discover if: ${{ needs.check.outputs.lambdas == 'true' || needs.check.outputs.containers == 'true' }} needs: check uses: ./.github/workflows/get_directories.yml build-lambdas: - name: Build Lambda Changes + name: Build Lambdas if: ${{ needs.check.outputs.lambdas == 'true' }} needs: - check @@ -187,7 +187,7 @@ jobs: just_action: lambda-build build-containers: - name: Build Container Changes + name: Build Containers if: ${{ needs.check.outputs.containers == 'true' }} needs: - check @@ -213,7 +213,7 @@ jobs: needs: - check runs-on: ubuntu-latest - name: Build frontend + name: Build Frontend timeout-minutes: 5 steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5c6c045..54f87d0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ permissions: jobs: get-current-tag: - name: Get Current Tag + name: Current Tag runs-on: ubuntu-latest outputs: current_tag: ${{ steps.get_tag.outputs.CURRENT_TAG }} @@ -30,7 +30,7 @@ jobs: echo "CURRENT_TAG=${CURRENT_TAG}" >> $GITHUB_OUTPUT get-next-tag: - name: Calculate Next Tag + name: Next Tag runs-on: ubuntu-latest outputs: tag: ${{ steps.get_next_version.outputs.version }} @@ -51,7 +51,7 @@ jobs: echo ${{ steps.get_next_version.outputs.hasNextVersion }} create-tag: - name: Create Tag + name: Tag needs: get-next-tag if: ${{ needs.get-next-tag.outputs.has-next-version == 'true' }} runs-on: ubuntu-latest @@ -66,7 +66,7 @@ jobs: git push origin --tag "$TAG" get-commits: - name: Collect Release Commits + name: Commits needs: - get-next-tag - create-tag @@ -91,12 +91,12 @@ jobs: echo "EOF" >> $GITHUB_OUTPUT get-apps: - name: Discover App Directories + name: Discover uses: ./.github/workflows/get_directories.yml build: - name: Build Release Artifacts + name: Build needs: - create-tag - get-next-tag @@ -115,7 +115,7 @@ jobs: ecs_matrix: ${{ needs.get-apps.outputs.container_dirs }} code: - name: Prepare Shared Infra Artifacts + name: Artifacts needs: - create-tag - get-next-tag @@ -128,7 +128,7 @@ jobs: infra_version: ${{ needs.get-next-tag.outputs.tag }} release: - name: Publish GitHub Release + name: Publish runs-on: ubuntu-latest needs: - get-next-tag From 0437f1531be7a1572ebc9f403cde996617220e7e Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 16:24:54 +0100 Subject: [PATCH 10/16] fix: healthcheck for bootstrap --- infra/modules/aws/_shared/service/README.md | 6 ++++++ infra/modules/aws/_shared/service/locals.tf | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/infra/modules/aws/_shared/service/README.md b/infra/modules/aws/_shared/service/README.md index 4d39493..7233530 100644 --- a/infra/modules/aws/_shared/service/README.md +++ b/infra/modules/aws/_shared/service/README.md @@ -20,6 +20,12 @@ Shared ECS service module. - `bootstrap_image_uri` - `codedeploy_alarm_names` +## Bootstrap behavior + +Bootstrap ECS services use the shared bootstrap image, which is a generic placeholder image rather than the real app container. + +Because of that, bootstrap target groups health-check `/` instead of the app-specific health endpoint. Once the real task definition is deployed, the normal service health path applies again, such as `/health` or `//health`. + ## Deployment strategies - `all_at_once` diff --git a/infra/modules/aws/_shared/service/locals.tf b/infra/modules/aws/_shared/service/locals.tf index 7214eb3..e349685 100644 --- a/infra/modules/aws/_shared/service/locals.tf +++ b/infra/modules/aws/_shared/service/locals.tf @@ -18,7 +18,7 @@ locals { green_target_group_name = "tg-${substr(md5("${var.service_name}-green"), 0, 8)}-green" is_default_path = var.root_path == "" - health_check_path = local.is_default_path ? "/health" : "/${var.root_path}/health" + health_check_path = var.bootstrap ? "/" : (local.is_default_path ? "/health" : "/${var.root_path}/health") exact_route_key = local.is_default_path ? "ANY /" : "ANY /${var.root_path}" proxy_route_key = local.is_default_path ? "ANY /{proxy+}" : "ANY /${var.root_path}/{proxy+}" target_group_arn = local.is_default_path ? var.default_target_group_arn : aws_lb_target_group.service_target_group[0].arn From b1c93a2868911f2aff2b92d1039d6ae2f413e59b Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 13 Apr 2026 16:29:38 +0100 Subject: [PATCH 11/16] chore: worker healthcheck --- README.md | 1 + infra/README.md | 2 +- infra/modules/aws/_shared/task/README.md | 3 +++ infra/modules/aws/_shared/task/locals.tf | 9 +++++++++ infra/modules/aws/_shared/task/variables.tf | 12 ++++++++++++ infra/modules/aws/task_worker/README.md | 1 + infra/modules/aws/task_worker/main.tf | 8 ++++++++ 7 files changed, 35 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a78d26f..0822d9c 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ For `*_code` release deploys, pass explicit release versions for each runtime yo The ECS worker queue is now owned by `task_worker`, and `service_worker` reads that queue name from `task_worker` remote state. That keeps the ECS worker queue aligned with the worker stack lifecycle without depending on the Lambda worker queue. For bootstrap service applies, `service_worker` now uses placeholder task and queue values locally rather than spreading `count`-indexed remote-state access through the module. +The ECS worker task uses an ECS container health check that verifies it can still read its configured SQS queue, so worker health is no longer inferred only from process startup. ## 🧪 example prompts diff --git a/infra/README.md b/infra/README.md index bbda4c8..49f7981 100644 --- a/infra/README.md +++ b/infra/README.md @@ -66,7 +66,7 @@ stores state at: Current examples include: - `task_worker` / `service_worker` - Internal ECS worker service shape. + Internal ECS worker service shape, with the worker queue owned by `task_worker` and a container health check that verifies SQS queue access. - `task_api` / `service_api` ECS API service shape exposed on the shared API Gateway at `/ecs` using `vpc_link` and `blue_green`. Through the frontend distribution it is reached at `/api/ecs/*`, while the Lambda API is reached at `/api/*`. diff --git a/infra/modules/aws/_shared/task/README.md b/infra/modules/aws/_shared/task/README.md index 72a157b..58bdd31 100644 --- a/infra/modules/aws/_shared/task/README.md +++ b/infra/modules/aws/_shared/task/README.md @@ -19,6 +19,7 @@ Shared ECS task-definition module. - `local_tunnel` - `xray_enabled` - `command` +- optional `health_check` In the concrete ECS task wrappers in this repo, `local_tunnel` and `xray_enabled` default to `false` unless the environment explicitly opts in. @@ -31,3 +32,5 @@ In the concrete ECS task wrappers in this repo, `local_tunnel` and `xray_enabled Use this for task revision creation. Traffic rollout happens at the service layer. The ECR repository access policy uses the explicit `ecr_repository_name` input. In this repo, Terragrunt sets a root-level default and environments can override it if the repository naming ever changes. + +When `health_check` is set, the module adds an ECS container health check to the main service container. diff --git a/infra/modules/aws/_shared/task/locals.tf b/infra/modules/aws/_shared/task/locals.tf index 1d8ecf7..f66ee27 100644 --- a/infra/modules/aws/_shared/task/locals.tf +++ b/infra/modules/aws/_shared/task/locals.tf @@ -84,6 +84,15 @@ locals { essential = true environment = concat(local.shared_environment, var.additional_env_vars) }, + var.health_check == null ? {} : { + healthCheck = { + command = var.health_check.command + interval = var.health_check.interval + timeout = var.health_check.timeout + retries = var.health_check.retries + startPeriod = var.health_check.start_period + } + }, var.command == null ? {} : { command = var.command } diff --git a/infra/modules/aws/_shared/task/variables.tf b/infra/modules/aws/_shared/task/variables.tf index 0425a24..8149b28 100644 --- a/infra/modules/aws/_shared/task/variables.tf +++ b/infra/modules/aws/_shared/task/variables.tf @@ -78,3 +78,15 @@ variable "additional_runtime_policy_arns" { type = list(string) default = [] } + +variable "health_check" { + description = "Optional ECS container health check configuration." + type = object({ + command = list(string) + interval = optional(number, 30) + timeout = optional(number, 5) + retries = optional(number, 3) + start_period = optional(number, 0) + }) + default = null +} diff --git a/infra/modules/aws/task_worker/README.md b/infra/modules/aws/task_worker/README.md index d9a8a5b..64aace1 100644 --- a/infra/modules/aws/task_worker/README.md +++ b/infra/modules/aws/task_worker/README.md @@ -13,6 +13,7 @@ Concrete ECS worker task wrapper. - publishes worker task revisions for ECS deploys - uses the shared ECR repository named by `ecr_repository_name` - injects its own queue URL into the container via `AWS_SQS_QUEUE_URL` +- uses an ECS container health check that verifies SQS access against its configured queue - defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled ## Key outputs diff --git a/infra/modules/aws/task_worker/main.tf b/infra/modules/aws/task_worker/main.tf index 45fc1ce..4378c4c 100644 --- a/infra/modules/aws/task_worker/main.tf +++ b/infra/modules/aws/task_worker/main.tf @@ -33,6 +33,14 @@ module "task_worker" { module.sqs_queue.sqs_queue_read_policy_arn ] + health_check = { + command = ["CMD-SHELL", "python -c \"import boto3, os; boto3.client('sqs', region_name=os.environ['AWS_REGION']).get_queue_attributes(QueueUrl=os.environ['AWS_SQS_QUEUE_URL'], AttributeNames=['QueueArn'])\""] + interval = 60 + timeout = 5 + retries = 3 + start_period = 10 + } + root_path = "" service_name = "ecs-worker" command = ["python", "-u", "app.py"] From fc8a2bde26464a95bd484683c3972e25fd7dabc6 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Tue, 14 Apr 2026 09:52:29 +0100 Subject: [PATCH 12/16] chore: heartbeat file healthcheck for worker --- README.md | 2 +- containers/worker/app.py | 8 ++++++++ infra/README.md | 2 +- infra/modules/aws/task_worker/README.md | 2 +- infra/modules/aws/task_worker/main.tf | 8 ++++++-- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0822d9c..7319898 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ For `*_code` release deploys, pass explicit release versions for each runtime yo The ECS worker queue is now owned by `task_worker`, and `service_worker` reads that queue name from `task_worker` remote state. That keeps the ECS worker queue aligned with the worker stack lifecycle without depending on the Lambda worker queue. For bootstrap service applies, `service_worker` now uses placeholder task and queue values locally rather than spreading `count`-indexed remote-state access through the module. -The ECS worker task uses an ECS container health check that verifies it can still read its configured SQS queue, so worker health is no longer inferred only from process startup. +The ECS worker task uses a local heartbeat-file health check, which is a better fit for a non-HTTP worker than probing a service endpoint or tying task health directly to transient AWS API calls. ## 🧪 example prompts diff --git a/containers/worker/app.py b/containers/worker/app.py index ebe2611..9ee3018 100644 --- a/containers/worker/app.py +++ b/containers/worker/app.py @@ -5,10 +5,16 @@ QUEUE_URL = os.environ['AWS_SQS_QUEUE_URL'] AWS_REGION = os.environ['AWS_REGION'] POLL_TIMEOUT = int(os.getenv("POLL_TIMEOUT", "60")) +HEARTBEAT_FILE = os.getenv("HEARTBEAT_FILE", "/tmp/worker-heartbeat") sqs = boto3.client('sqs', region_name=AWS_REGION) +def write_heartbeat(): + with open(HEARTBEAT_FILE, "w", encoding="utf-8") as heartbeat: + heartbeat.write(str(int(time.time()))) + + def process_message(msg): # TODO: implement business logic print({"message_id": msg['MessageId'], "body": msg['Body'][:200]}) @@ -35,6 +41,8 @@ def poll(): if __name__ == "__main__": print(f"Starting SQS poller for {QUEUE_URL}") + write_heartbeat() while True: poll() + write_heartbeat() time.sleep(POLL_TIMEOUT) diff --git a/infra/README.md b/infra/README.md index 49f7981..5b285e9 100644 --- a/infra/README.md +++ b/infra/README.md @@ -66,7 +66,7 @@ stores state at: Current examples include: - `task_worker` / `service_worker` - Internal ECS worker service shape, with the worker queue owned by `task_worker` and a container health check that verifies SQS queue access. + Internal ECS worker service shape, with the worker queue owned by `task_worker` and a container health check based on a local worker heartbeat file. - `task_api` / `service_api` ECS API service shape exposed on the shared API Gateway at `/ecs` using `vpc_link` and `blue_green`. Through the frontend distribution it is reached at `/api/ecs/*`, while the Lambda API is reached at `/api/*`. diff --git a/infra/modules/aws/task_worker/README.md b/infra/modules/aws/task_worker/README.md index 64aace1..7d50b83 100644 --- a/infra/modules/aws/task_worker/README.md +++ b/infra/modules/aws/task_worker/README.md @@ -13,7 +13,7 @@ Concrete ECS worker task wrapper. - publishes worker task revisions for ECS deploys - uses the shared ECR repository named by `ecr_repository_name` - injects its own queue URL into the container via `AWS_SQS_QUEUE_URL` -- uses an ECS container health check that verifies SQS access against its configured queue +- updates a local heartbeat file as it polls and uses an ECS container health check against that heartbeat - defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled ## Key outputs diff --git a/infra/modules/aws/task_worker/main.tf b/infra/modules/aws/task_worker/main.tf index 4378c4c..095feab 100644 --- a/infra/modules/aws/task_worker/main.tf +++ b/infra/modules/aws/task_worker/main.tf @@ -27,6 +27,10 @@ module "task_worker" { { name = "AWS_SQS_QUEUE_URL" value = module.sqs_queue.sqs_queue_url + }, + { + name = "HEARTBEAT_FILE" + value = "/tmp/worker-heartbeat" } ] additional_runtime_policy_arns = [ @@ -34,11 +38,11 @@ module "task_worker" { ] health_check = { - command = ["CMD-SHELL", "python -c \"import boto3, os; boto3.client('sqs', region_name=os.environ['AWS_REGION']).get_queue_attributes(QueueUrl=os.environ['AWS_SQS_QUEUE_URL'], AttributeNames=['QueueArn'])\""] + command = ["CMD-SHELL", "python -c \"import os, time; path=os.environ['HEARTBEAT_FILE']; now=time.time(); mtime=os.path.getmtime(path); raise SystemExit(0 if now - mtime < 180 else 1)\""] interval = 60 timeout = 5 retries = 3 - start_period = 10 + start_period = 30 } root_path = "" From 593c17f1707f1e3b27cbb59935183f9cf3be5c60 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Tue, 14 Apr 2026 10:04:51 +0100 Subject: [PATCH 13/16] chore: ignore load_balancer ecs service --- README.md | 10 ++++------ infra/modules/aws/_shared/service/README.md | 18 ++++++++++-------- infra/modules/aws/_shared/service/main.tf | 7 +++++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7319898..804a9d3 100644 --- a/README.md +++ b/README.md @@ -221,14 +221,12 @@ deployment_strategy = "blue_green" - ECS CodeDeploy is only created for load-balanced ECS services in `_shared/service` - internal ECS services without load balancer integration should use native ECS rolling updates instead -- the shared ECS service resource ignores `task_definition` drift so later infra applies do not revert the live task revision after either a rolling deploy or a CodeDeploy rollout +- infra ignores ECS `task_definition` drift +- for CodeDeploy ECS services, infra also ignores `load_balancer` drift - the deployment workflow: - applies the new `task_*` revision - - if the service has CodeDeploy resources, reads `codedeploy_app_name` and `codedeploy_deployment_group_name` from `service_*` - - renders [`appspec-ecs.yml`](appspec-ecs.yml) - - uploads the AppSpec to the code bucket - - runs `just ecs-deploy` - - otherwise updates the ECS service to the new task definition with a native rolling deploy + - uses CodeDeploy for load-balanced services + - uses native rolling deploys for internal services ## 🔥↩️ deployment roll-back diff --git a/infra/modules/aws/_shared/service/README.md b/infra/modules/aws/_shared/service/README.md index 7233530..ce43626 100644 --- a/infra/modules/aws/_shared/service/README.md +++ b/infra/modules/aws/_shared/service/README.md @@ -22,9 +22,9 @@ Shared ECS service module. ## Bootstrap behavior -Bootstrap ECS services use the shared bootstrap image, which is a generic placeholder image rather than the real app container. - -Because of that, bootstrap target groups health-check `/` instead of the app-specific health endpoint. Once the real task definition is deployed, the normal service health path applies again, such as `/health` or `//health`. +Bootstrap ECS services use the shared placeholder image. +Bootstrap health checks use `/`. +Real task deploys use the normal app health path, such as `/health` or `//health`. ## Deployment strategies @@ -38,11 +38,13 @@ For internal non-load-balanced services, the deploy workflow falls back to nativ ## Drift ownership -The ECS service ignores changes to `task_definition`. +The ECS service ignores: -That is intentional: +- `task_definition` +- `load_balancer` -- deploy workflows own the live task revision -- infra applies own the stable service shape +Reason: -Without that split, a later infra apply would revert a successful rolling or CodeDeploy deployment back to the older task definition stored in Terraform state. +- deploy workflows own the live revision +- infra owns the stable service shape +- CodeDeploy ECS services reject `load_balancer` updates via `UpdateService` diff --git a/infra/modules/aws/_shared/service/main.tf b/infra/modules/aws/_shared/service/main.tf index 5c99327..fd871b9 100644 --- a/infra/modules/aws/_shared/service/main.tf +++ b/infra/modules/aws/_shared/service/main.tf @@ -176,9 +176,12 @@ resource "aws_ecs_service" "service" { } lifecycle { - # Deploy workflows own the live task revision. Terraform keeps the service - # shape stable without reverting the currently deployed revision. + # Deploy workflows own the live task revision. Terraform keeps the service stable without reverting the currently deployed revision. + + # For CODE_DEPLOY services, ECS also rejects load balancer updates through UpdateService. Terraform still owns the target group and listener-rule + # resources themselves, but the ECS service attachment must stay stable after first creation. ignore_changes = [ + load_balancer, task_definition, ] } From b99d06c906a6eaaeaca35b71c6b3034915532226 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Tue, 14 Apr 2026 10:23:59 +0100 Subject: [PATCH 14/16] chore: cloudfront invalidate MAX_ATTEMPTS=30 --- justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/justfile b/justfile index 806493e..8033edd 100644 --- a/justfile +++ b/justfile @@ -870,7 +870,7 @@ frontend-invalidate: exit 1 fi - MAX_ATTEMPTS=18 + MAX_ATTEMPTS=30 SLEEP_INTERVAL=10 echo "🔄 Creating CloudFront invalidation..." From 7ae1a4287a368f9a4fdc97dd4f0e354afd460f7e Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Tue, 14 Apr 2026 10:28:06 +0100 Subject: [PATCH 15/16] fix: listener arn --- infra/modules/aws/_shared/service/README.md | 3 +++ infra/modules/aws/_shared/service/locals.tf | 1 + infra/modules/aws/_shared/service/main.tf | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/infra/modules/aws/_shared/service/README.md b/infra/modules/aws/_shared/service/README.md index ce43626..55b77e5 100644 --- a/infra/modules/aws/_shared/service/README.md +++ b/infra/modules/aws/_shared/service/README.md @@ -20,6 +20,9 @@ Shared ECS service module. - `bootstrap_image_uri` - `codedeploy_alarm_names` +Subpath services match both `/` and `//*`. +For CodeDeploy-backed subpath services, the traffic route uses the ALB listener-rule ARN. + ## Bootstrap behavior Bootstrap ECS services use the shared placeholder image. diff --git a/infra/modules/aws/_shared/service/locals.tf b/infra/modules/aws/_shared/service/locals.tf index e349685..294c206 100644 --- a/infra/modules/aws/_shared/service/locals.tf +++ b/infra/modules/aws/_shared/service/locals.tf @@ -23,6 +23,7 @@ locals { proxy_route_key = local.is_default_path ? "ANY /{proxy+}" : "ANY /${var.root_path}/{proxy+}" target_group_arn = local.is_default_path ? var.default_target_group_arn : aws_lb_target_group.service_target_group[0].arn blue_target_group_name = local.is_default_path ? element(split("/", var.default_target_group_arn), 1) : aws_lb_target_group.service_target_group[0].name + traffic_route_arn = local.is_default_path ? var.default_http_listener_arn : aws_lb_listener_rule.service[0].arn load_balancers = var.connection_type == "internal_dns" || var.connection_type == "vpc_link" ? [{ target_group_arn = local.target_group_arn diff --git a/infra/modules/aws/_shared/service/main.tf b/infra/modules/aws/_shared/service/main.tf index fd871b9..b595a84 100644 --- a/infra/modules/aws/_shared/service/main.tf +++ b/infra/modules/aws/_shared/service/main.tf @@ -253,7 +253,7 @@ resource "aws_codedeploy_deployment_group" "ecs" { load_balancer_info { target_group_pair_info { prod_traffic_route { - listener_arns = [var.default_http_listener_arn] + listener_arns = [local.traffic_route_arn] } target_group { From 9898da1aebb4e237c6201af1a3f91490704d01b7 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Tue, 14 Apr 2026 11:03:11 +0100 Subject: [PATCH 16/16] chore: mv listener ownership --- AGENTS.md | 1 + README.md | 2 ++ frontend/src/App.jsx | 17 +++++++++--- infra/README.md | 2 +- infra/modules/aws/_shared/service/README.md | 3 ++- infra/modules/aws/_shared/service/locals.tf | 5 ++-- infra/modules/aws/_shared/service/main.tf | 26 ++++++++++++++++--- .../modules/aws/_shared/service/variables.tf | 10 +++++++ infra/modules/aws/network/README.md | 1 + infra/modules/aws/network/outputs.tf | 4 +++ infra/modules/aws/security/README.md | 2 ++ infra/modules/aws/security/main.tf | 7 +++++ infra/modules/aws/security/variables.tf | 5 ++++ infra/modules/aws/service_api/README.md | 1 + infra/modules/aws/service_api/main.tf | 2 ++ 15 files changed, 77 insertions(+), 11 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 90635ca..094c033 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -44,6 +44,7 @@ Choose deployment modes that match the runtime shape. - ECS CodeDeploy requires a load-balanced service shape in this repo. - In practice that means `connection_type` must be `internal_dns` or `vpc_link` for CodeDeploy-backed ECS deploys. +- In this repo, subpath ECS services need a dedicated ALB listener if they are meant to use CodeDeploy blue/green. - If `connection_type = "internal"`, prefer `rolling`. ## Feasibility Check diff --git a/README.md b/README.md index 804a9d3..040d342 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ This repo now includes a sample ECS API container service exposed separately fro - API Gateway Lambda route namespace: `/*` - API Gateway ECS route namespace: `/ecs/*` - deployment model: ECS CodeDeploy `blue_green` +- ALB shape: shared private ALB with a dedicated ECS API listener on port `8080` - stacks: `task_api` and `service_api` - the sample frontend calls both backends and renders both responses so the path split is visible in the UI @@ -220,6 +221,7 @@ deployment_strategy = "blue_green" ``` - ECS CodeDeploy is only created for load-balanced ECS services in `_shared/service` +- subpath ECS services need a dedicated ALB listener if they are meant to use CodeDeploy blue/green in this repo - internal ECS services without load balancer integration should use native ECS rolling updates instead - infra ignores ECS `task_definition` drift - for CodeDeploy ECS services, infra also ignores `load_balancer` drift diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index a03c28d..3eba86a 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,5 +1,16 @@ import { useEffect, useState } from 'react' +async function fetchJson(url) { + const response = await fetch(url) + const text = await response.text() + + try { + return JSON.parse(text) + } catch { + throw new Error(`${response.status} ${response.statusText}: ${text.slice(0, 200)}`) + } +} + export default function App() { const [lambdaData, setLambdaData] = useState(null) const [lambdaError, setLambdaError] = useState(null) @@ -7,13 +18,11 @@ export default function App() { const [ecsError, setEcsError] = useState(null) useEffect(() => { - fetch('/api/') - .then((r) => r.json()) + fetchJson('/api/') .then(setLambdaData) .catch(setLambdaError) - fetch('/api/ecs/') - .then((r) => r.json()) + fetchJson('/api/ecs') .then(setEcsData) .catch(setEcsError) }, []) diff --git a/infra/README.md b/infra/README.md index 5b285e9..bb15b04 100644 --- a/infra/README.md +++ b/infra/README.md @@ -68,7 +68,7 @@ Current examples include: - `task_worker` / `service_worker` Internal ECS worker service shape, with the worker queue owned by `task_worker` and a container health check based on a local worker heartbeat file. - `task_api` / `service_api` - ECS API service shape exposed on the shared API Gateway at `/ecs` using `vpc_link` and `blue_green`. Through the frontend distribution it is reached at `/api/ecs/*`, while the Lambda API is reached at `/api/*`. + ECS API service shape exposed on the shared API Gateway at `/ecs` using `vpc_link` and `blue_green`, backed by a dedicated listener on the shared ALB. Through the frontend distribution it is reached at `/api/ecs/*`, while the Lambda API is reached at `/api/*`. ## Dependency Notes diff --git a/infra/modules/aws/_shared/service/README.md b/infra/modules/aws/_shared/service/README.md index 55b77e5..1142f1b 100644 --- a/infra/modules/aws/_shared/service/README.md +++ b/infra/modules/aws/_shared/service/README.md @@ -19,9 +19,10 @@ Shared ECS service module. - `bootstrap` - `bootstrap_image_uri` - `codedeploy_alarm_names` +- optional `dedicated_listener_port` Subpath services match both `/` and `//*`. -For CodeDeploy-backed subpath services, the traffic route uses the ALB listener-rule ARN. +If `dedicated_listener_port` is set, the service gets its own ALB listener and uses that listener for API Gateway integration and ECS CodeDeploy traffic routing. ## Bootstrap behavior diff --git a/infra/modules/aws/_shared/service/locals.tf b/infra/modules/aws/_shared/service/locals.tf index 294c206..a80d124 100644 --- a/infra/modules/aws/_shared/service/locals.tf +++ b/infra/modules/aws/_shared/service/locals.tf @@ -1,5 +1,6 @@ locals { - use_vpc_link = var.connection_type == "vpc_link" + use_vpc_link = var.connection_type == "vpc_link" + use_dedicated_listener = var.dedicated_listener_port != null enable_codedeploy = ( var.connection_type == "internal_dns" || var.connection_type == "vpc_link" ) @@ -23,7 +24,7 @@ locals { proxy_route_key = local.is_default_path ? "ANY /{proxy+}" : "ANY /${var.root_path}/{proxy+}" target_group_arn = local.is_default_path ? var.default_target_group_arn : aws_lb_target_group.service_target_group[0].arn blue_target_group_name = local.is_default_path ? element(split("/", var.default_target_group_arn), 1) : aws_lb_target_group.service_target_group[0].name - traffic_route_arn = local.is_default_path ? var.default_http_listener_arn : aws_lb_listener_rule.service[0].arn + traffic_route_arn = local.use_dedicated_listener ? aws_lb_listener.service[0].arn : (local.is_default_path ? var.default_http_listener_arn : aws_lb_listener_rule.service[0].arn) load_balancers = var.connection_type == "internal_dns" || var.connection_type == "vpc_link" ? [{ target_group_arn = local.target_group_arn diff --git a/infra/modules/aws/_shared/service/main.tf b/infra/modules/aws/_shared/service/main.tf index b595a84..c92656f 100644 --- a/infra/modules/aws/_shared/service/main.tf +++ b/infra/modules/aws/_shared/service/main.tf @@ -81,7 +81,7 @@ resource "aws_lb_target_group" "green_target_group" { } resource "aws_lb_listener_rule" "service" { - count = local.is_default_path ? 0 : 1 + count = (!local.is_default_path && !local.use_dedicated_listener) ? 1 : 0 listener_arn = var.default_http_listener_arn priority = local.priority @@ -93,7 +93,27 @@ resource "aws_lb_listener_rule" "service" { condition { path_pattern { - values = ["/${var.root_path}/*"] + values = ["/${var.root_path}", "/${var.root_path}/*"] + } + } +} + +resource "aws_lb_listener" "service" { + count = local.use_dedicated_listener ? 1 : 0 + + load_balancer_arn = var.load_balancer_arn + port = var.dedicated_listener_port + protocol = "HTTP" + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.service_target_group[0].arn + } + + lifecycle { + precondition { + condition = var.load_balancer_arn != "" + error_message = "load_balancer_arn must be set when dedicated_listener_port is used." } } } @@ -124,7 +144,7 @@ resource "aws_apigatewayv2_integration" "service" { connection_type = "VPC_LINK" integration_type = "HTTP_PROXY" integration_method = "ANY" - integration_uri = var.default_http_listener_arn + integration_uri = local.traffic_route_arn payload_format_version = "1.0" lifecycle { diff --git a/infra/modules/aws/_shared/service/variables.tf b/infra/modules/aws/_shared/service/variables.tf index c18d81f..9d917d0 100644 --- a/infra/modules/aws/_shared/service/variables.tf +++ b/infra/modules/aws/_shared/service/variables.tf @@ -41,6 +41,11 @@ variable "default_target_group_arn" { type = string } +variable "load_balancer_arn" { + type = string + default = "" +} + variable "default_http_listener_arn" { type = string } @@ -69,6 +74,11 @@ variable "api_invoke_url" { type = string } +variable "dedicated_listener_port" { + type = number + default = null +} + variable "root_path" { description = "The path to serve the service from. / is for default /example_service is for subpath" type = string diff --git a/infra/modules/aws/network/README.md b/infra/modules/aws/network/README.md index cdabc64..37ddf59 100644 --- a/infra/modules/aws/network/README.md +++ b/infra/modules/aws/network/README.md @@ -19,6 +19,7 @@ Shared network and routing module. ## Key outputs +- `load_balancer_arn` - ALB listener and target group identifiers - `internal_invoke_url` - `api_id` diff --git a/infra/modules/aws/network/outputs.tf b/infra/modules/aws/network/outputs.tf index d427a93..94ead26 100644 --- a/infra/modules/aws/network/outputs.tf +++ b/infra/modules/aws/network/outputs.tf @@ -2,6 +2,10 @@ output "default_target_group_arn" { value = aws_lb_target_group.default.arn } +output "load_balancer_arn" { + value = aws_lb.this.arn +} + output "default_http_listener_arn" { value = aws_lb_listener.http.arn } diff --git a/infra/modules/aws/security/README.md b/infra/modules/aws/security/README.md index 4282a4f..4b788fe 100644 --- a/infra/modules/aws/security/README.md +++ b/infra/modules/aws/security/README.md @@ -17,3 +17,5 @@ Shared security-group module. - `api_vpc_link_sg` Used by `network`, `api`, and ECS service modules. + +The load balancer security group also allows the additional internal listener port used by blue/green ECS services with dedicated listeners. diff --git a/infra/modules/aws/security/main.tf b/infra/modules/aws/security/main.tf index 856a6e9..594956e 100644 --- a/infra/modules/aws/security/main.tf +++ b/infra/modules/aws/security/main.tf @@ -10,6 +10,13 @@ resource "aws_security_group" "load_balancer" { cidr_blocks = [data.aws_vpc.this.cidr_block] } + ingress { + from_port = var.additional_listener_port + to_port = var.additional_listener_port + protocol = "tcp" + cidr_blocks = [data.aws_vpc.this.cidr_block] + } + egress { from_port = 0 to_port = 0 diff --git a/infra/modules/aws/security/variables.tf b/infra/modules/aws/security/variables.tf index 6f97077..0bea6c1 100644 --- a/infra/modules/aws/security/variables.tf +++ b/infra/modules/aws/security/variables.tf @@ -15,3 +15,8 @@ variable "vpc_name" { variable "container_port" { type = number } + +variable "additional_listener_port" { + type = number + default = 8080 +} diff --git a/infra/modules/aws/service_api/README.md b/infra/modules/aws/service_api/README.md index f3a6c5c..63538a6 100644 --- a/infra/modules/aws/service_api/README.md +++ b/infra/modules/aws/service_api/README.md @@ -16,6 +16,7 @@ Concrete ECS API service wrapper for the sample API service. - exposes the ECS API container on the shared HTTP API Gateway using `connection_type = "vpc_link"` - uses `deployment_strategy = "blue_green"` +- uses a dedicated ALB listener on port `8080` so ECS CodeDeploy can own traffic - defaults `local_tunnel` and `xray_enabled` to `false` unless explicitly enabled ## Key outputs diff --git a/infra/modules/aws/service_api/main.tf b/infra/modules/aws/service_api/main.tf index 4dff3f2..d4e1909 100644 --- a/infra/modules/aws/service_api/main.tf +++ b/infra/modules/aws/service_api/main.tf @@ -16,6 +16,7 @@ module "service_api" { ecs_security_group_id = data.terraform_remote_state.security.outputs.ecs_sg default_target_group_arn = data.terraform_remote_state.network.outputs.default_target_group_arn + load_balancer_arn = data.terraform_remote_state.network.outputs.load_balancer_arn default_http_listener_arn = data.terraform_remote_state.network.outputs.default_http_listener_arn load_balancer_arn_suffix = data.terraform_remote_state.network.outputs.load_balancer_arn_suffix target_group_arn_suffix = data.terraform_remote_state.network.outputs.target_group_arn_suffix @@ -33,6 +34,7 @@ module "service_api" { desired_task_count = 1 deployment_strategy = "blue_green" + dedicated_listener_port = 8080 codedeploy_alarm_names = [] additional_security_group_ids = []