From 874a9787edd87a733ad2ab3f2516a94d7fc9c5e3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Aug 2025 20:49:56 -0400 Subject: [PATCH 1/4] terraform/aws: Create consistently-named extra volumes AWS EBS volumes do not have consistent names across reboots. Sometimes the root is /dev/nvme0n1; sometimes it can be /dev/nvme2n1. The data device is likewise flaky. So, unlike with libvirt, a device pathname cannot be soldered into a .config and be expected to work reliably. To make sure that EBS volumes are always accessible via a consistent device name, add a tag to each extra volume created by terraform, and use a udev script to link that device under /dev/disk/kdevops. That way, the pathname "/dev/disk/kdevops/extra-volume-00" can always refer to /data on all instances and in our .config files. Assisted-by: Claude AI Signed-off-by: Chuck Lever --- playbooks/extra_volumes.yml | 6 +++ playbooks/roles/extra_volumes/tasks/main.yml | 6 +++ .../extra_volumes/tasks/providers/aws.yml | 48 +++++++++++++++++++ .../extra_volumes/tasks/providers/azure.yml | 1 + .../extra_volumes/tasks/providers/gce.yml | 1 + .../extra_volumes/tasks/providers/oci.yml | 1 + .../templates/99-aws-ebs.rules.j2 | 1 + .../templates/kdevops-disk.conf.j2 | 3 ++ .../templates/udev-ebs-tagger.j2 | 26 ++++++++++ scripts/terraform.Makefile | 3 ++ terraform/aws/kdevops_ebs_volumes/main.tf | 4 ++ terraform/aws/kdevops_ebs_volumes/outputs.tf | 6 +++ terraform/aws/output.tf | 6 +++ 13 files changed, 112 insertions(+) create mode 100644 playbooks/extra_volumes.yml create mode 100644 playbooks/roles/extra_volumes/tasks/main.yml create mode 100644 playbooks/roles/extra_volumes/tasks/providers/aws.yml create mode 100644 playbooks/roles/extra_volumes/tasks/providers/azure.yml create mode 100644 playbooks/roles/extra_volumes/tasks/providers/gce.yml create mode 100644 playbooks/roles/extra_volumes/tasks/providers/oci.yml create mode 100644 playbooks/roles/extra_volumes/templates/99-aws-ebs.rules.j2 create mode 100644 playbooks/roles/extra_volumes/templates/kdevops-disk.conf.j2 create mode 100644 playbooks/roles/extra_volumes/templates/udev-ebs-tagger.j2 diff --git a/playbooks/extra_volumes.yml b/playbooks/extra_volumes.yml new file mode 100644 index 000000000..c556f7513 --- /dev/null +++ b/playbooks/extra_volumes.yml @@ -0,0 +1,6 @@ +--- +- name: Set up udev rules for /dev/disk/kdevops/ + gather_facts: false + hosts: baseline:dev:service + roles: + - role: extra_volumes diff --git a/playbooks/roles/extra_volumes/tasks/main.yml b/playbooks/roles/extra_volumes/tasks/main.yml new file mode 100644 index 000000000..dab335a8c --- /dev/null +++ b/playbooks/roles/extra_volumes/tasks/main.yml @@ -0,0 +1,6 @@ +--- +- name: Include provider-specific tasks + ansible.builtin.include_tasks: + file: "{{ role_path }}/tasks/providers/{{ kdevops_terraform_provider }}.yml" + when: + - kdevops_terraform_provider == "aws" diff --git a/playbooks/roles/extra_volumes/tasks/providers/aws.yml b/playbooks/roles/extra_volumes/tasks/providers/aws.yml new file mode 100644 index 000000000..b110f68ec --- /dev/null +++ b/playbooks/roles/extra_volumes/tasks/providers/aws.yml @@ -0,0 +1,48 @@ +--- +- name: Install tmpfiles.d configuration for /dev/disk/kdevops/ + become: true + ansible.builtin.template: + src: "kdevops-disk.conf.j2" + dest: "/etc/tmpfiles.d/kdevops-disk.conf" + owner: "root" + group: "root" + mode: "u=rw,g=r,o=r" + +- name: Create /dev/disk/kdevops/ directory using tmpfiles + become: true + ansible.builtin.command: "systemd-tmpfiles --create /etc/tmpfiles.d/kdevops-disk.conf" + changed_when: true + +- name: Extract the "extra volumes" map + delegate_to: localhost + run_once: true + cloud.terraform.terraform_output: + format: json + name: "extra_volumes_map" + project_path: "{{ topdir_path }}/terraform/aws" + register: terraform_output + +- name: Install the script that creates symlinks in /dev/disk/kdevops + become: true + vars: + volume_mapping: "{{ terraform_output.value[inventory_hostname] }}" + ansible.builtin.template: + src: "udev-ebs-tagger.j2" + dest: "/usr/local/bin/udev-ebs-tagger" + owner: "root" + group: "root" + mode: "u=rwx,g=rx,o=rx" + +- name: Create the "extra volumes" udev rule + become: true + ansible.builtin.template: + src: 99-aws-ebs.rules.j2 + dest: "/etc/udev/rules.d/99-aws-ebs.rules" + owner: "root" + group: "root" + mode: "u=rw,g=r,o=r" + +- name: Force the target node to reload its udev ruleset and trigger block devices + become: true + ansible.builtin.shell: "udevadm control --reload && udevadm trigger --subsystem-match=block --action=add" + changed_when: true diff --git a/playbooks/roles/extra_volumes/tasks/providers/azure.yml b/playbooks/roles/extra_volumes/tasks/providers/azure.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/playbooks/roles/extra_volumes/tasks/providers/azure.yml @@ -0,0 +1 @@ +--- diff --git a/playbooks/roles/extra_volumes/tasks/providers/gce.yml b/playbooks/roles/extra_volumes/tasks/providers/gce.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/playbooks/roles/extra_volumes/tasks/providers/gce.yml @@ -0,0 +1 @@ +--- diff --git a/playbooks/roles/extra_volumes/tasks/providers/oci.yml b/playbooks/roles/extra_volumes/tasks/providers/oci.yml new file mode 100644 index 000000000..ed97d539c --- /dev/null +++ b/playbooks/roles/extra_volumes/tasks/providers/oci.yml @@ -0,0 +1 @@ +--- diff --git a/playbooks/roles/extra_volumes/templates/99-aws-ebs.rules.j2 b/playbooks/roles/extra_volumes/templates/99-aws-ebs.rules.j2 new file mode 100644 index 000000000..396bc8fd2 --- /dev/null +++ b/playbooks/roles/extra_volumes/templates/99-aws-ebs.rules.j2 @@ -0,0 +1 @@ +ACTION=="add|change", KERNEL=="nvme[0-9]*n[0-9]*", SUBSYSTEM=="block", PROGRAM="/usr/local/bin/udev-ebs-tagger %k", SYMLINK+="%c" diff --git a/playbooks/roles/extra_volumes/templates/kdevops-disk.conf.j2 b/playbooks/roles/extra_volumes/templates/kdevops-disk.conf.j2 new file mode 100644 index 000000000..928fc96e5 --- /dev/null +++ b/playbooks/roles/extra_volumes/templates/kdevops-disk.conf.j2 @@ -0,0 +1,3 @@ +# kdevops extra volumes directory +# This directory must exist before udev creates symlinks to extra volumes +d /dev/disk/kdevops 0755 root root - - diff --git a/playbooks/roles/extra_volumes/templates/udev-ebs-tagger.j2 b/playbooks/roles/extra_volumes/templates/udev-ebs-tagger.j2 new file mode 100644 index 000000000..ff0d0d4e1 --- /dev/null +++ b/playbooks/roles/extra_volumes/templates/udev-ebs-tagger.j2 @@ -0,0 +1,26 @@ +#!/bin/bash + +DEVICE="$1" + +# Ensure the target directory exists +mkdir -p /dev/disk/kdevops + +# Get volume ID from device serial +VOLUME_ID=$(/usr/bin/lsblk -no SERIAL "/dev/$DEVICE" 2>/dev/null | head -1) +if [[ -z "$VOLUME_ID" || "$VOLUME_ID" == "null" ]]; then + exit 0 +fi + +# Static mapping from Terraform outputs +case "$VOLUME_ID" in +{% for tags, volume_id in volume_mapping.items() %} + "{{ volume_id.replace("-", "") }}") + SYMLINKS="disk/kdevops/{{ tags }} " + echo "$SYMLINKS" | sed 's/[[:space:]]*$//' + ;; +{% endfor %} + *) + # Unknown volume + exit 0 + ;; +esac diff --git a/scripts/terraform.Makefile b/scripts/terraform.Makefile index ef11e0e39..c23fe58a4 100644 --- a/scripts/terraform.Makefile +++ b/scripts/terraform.Makefile @@ -214,6 +214,9 @@ $(KDEVOPS_PROVISIONED_SSH): baseline:dev:service \ -m wait_for_connection $(Q)touch $(KDEVOPS_PROVISIONED_SSH) + $(Q)ansible-playbook $(ANSIBLE_VERBOSE) \ + -i hosts playbooks/extra_volumes.yml \ + --extra-vars=@./extra_vars.yaml status_terraform: $(Q)ansible-playbook $(ANSIBLE_VERBOSE) \ diff --git a/terraform/aws/kdevops_ebs_volumes/main.tf b/terraform/aws/kdevops_ebs_volumes/main.tf index 982f9b9d9..024a75411 100644 --- a/terraform/aws/kdevops_ebs_volumes/main.tf +++ b/terraform/aws/kdevops_ebs_volumes/main.tf @@ -11,6 +11,10 @@ resource "aws_ebs_volume" "kdevops_volume" { size = var.vol_size throughput = var.vol_throughput type = var.vol_type + + tags = { + FixedName = format("extra-volume-%02d", count.index) + } } resource "aws_volume_attachment" "kdevops_attachment" { diff --git a/terraform/aws/kdevops_ebs_volumes/outputs.tf b/terraform/aws/kdevops_ebs_volumes/outputs.tf index a8b1f8d22..226886457 100644 --- a/terraform/aws/kdevops_ebs_volumes/outputs.tf +++ b/terraform/aws/kdevops_ebs_volumes/outputs.tf @@ -3,3 +3,9 @@ output "ebs_volume_map" { value = zipmap(aws_volume_attachment.kdevops_attachment[*].device_name, aws_volume_attachment.kdevops_attachment[*].volume_id) } + +output "extra_volumes_tags" { + description = "Tag to volume ID mappings for udev configuration" + value = zipmap(aws_ebs_volume.kdevops_volume[*].tags["FixedName"], + aws_volume_attachment.kdevops_attachment[*].volume_id) +} diff --git a/terraform/aws/output.tf b/terraform/aws/output.tf index b41502689..4b7bb9bfd 100644 --- a/terraform/aws/output.tf +++ b/terraform/aws/output.tf @@ -14,3 +14,9 @@ output "block_device_map" { value = zipmap(var.kdevops_nodes[*], module.kdevops_ebs_volumes[*].ebs_volume_map) } + +output "extra_volumes_map" { + description = "Tag to volume ID mappings for udev configuration" + value = zipmap(var.kdevops_nodes[*], + module.kdevops_ebs_volumes[*].extra_volumes_tags) +} From f794a86fa90ef1115d90df18deb77055edb6d029 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Aug 2025 11:03:51 -0400 Subject: [PATCH 2/4] terraform: Add fixed storage pathnames for all providers Now that all cloud providers have consistent device pathnames for the extra volumes created by terraform, we can make setting the "data" and "sparse" device names consistent as well. These symbolic constants can then be used by the volume_group and devconfig playbooks and the fstests workflow to reliably locate their block device resources on test runners in the cloud. Follow the design of the OCI provider and expose the device pathnames for the data and sparse device so these pathnames can be used reliably by other kdevops subsystems. Note that the AWS data volume default, which was hidden in the Kconfig menu, changes from "/dev/nvme1n1" to "/dev/disk/kdevops/extra-volume-00." Existing AWS configurations might break if they depend on the old pathname. Also, "make menuconfig" by itself does not change the existing value of TERRAFORM_AWS_DATA_VOLUME_DEVICE_FILE_NAME to the new string. Users must make that adjustment manually in existing defconfig files. Signed-off-by: Chuck Lever --- terraform/aws/kconfigs/Kconfig.storage | 17 +++++++++++++++-- terraform/azure/kconfigs/Kconfig.storage | 16 +++++++++++++--- terraform/gce/kconfigs/Kconfig.storage | 14 +++++++++++++- terraform/oci/kconfigs/Kconfig.storage | 7 +++++-- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/terraform/aws/kconfigs/Kconfig.storage b/terraform/aws/kconfigs/Kconfig.storage index 41daa0a39..f65504abd 100644 --- a/terraform/aws/kconfigs/Kconfig.storage +++ b/terraform/aws/kconfigs/Kconfig.storage @@ -203,5 +203,18 @@ config TERRAFORM_AWS_EBS_VOLUME_SIZE default 4096 if TERRAFORM_AWS_EBS_VOLUME_SIZE_4096G config TERRAFORM_AWS_DATA_VOLUME_DEVICE_FILE_NAME - string - default "/dev/nvme1n1" + string "Data volume's device file name" + output yaml + default "/dev/disk/kdevops/extra-volume-00" + help + This option sets the name of the block device on each + target node that is to be used for the /data file system. + +config TERRAFORM_AWS_SPARSE_VOLUME_DEVICE_FILE_NAME + string "Sparse volume's device file name" + output yaml + default "/dev/disk/kdevops/extra-volume-01" + help + This option sets the name of the block device on each + target node that can be used for the fstests sparse device + tests. diff --git a/terraform/azure/kconfigs/Kconfig.storage b/terraform/azure/kconfigs/Kconfig.storage index d4885163a..0ac5e023f 100644 --- a/terraform/azure/kconfigs/Kconfig.storage +++ b/terraform/azure/kconfigs/Kconfig.storage @@ -212,8 +212,18 @@ config TERRAFORM_AZURE_MANAGED_DISKS_TIER default "P50" if TERRAFORM_AZURE_MANAGED_DISKS_TIER_P50 config TERRAFORM_AZURE_DATA_VOLUME_DEVICE_FILE_NAME - string "Device name for the /data file system" + string "Data volume's device file name" + output yaml default "/dev/disk/azure/scsi1/lun0" help - This option sets the name of the block device on each target - node that is to be used for the /data file system. + This option sets the name of the block device on each + target node that is to be used for the /data file system. + +config TERRAFORM_AZURE_SPARSE_VOLUME_DEVICE_FILE_NAME + string "Sparse volume's device file name" + output yaml + default "/dev/disk/azure/scsi1/lun1" + help + This option sets the name of the block device on each + target node that can be used for the fstests sparse device + tests. diff --git a/terraform/gce/kconfigs/Kconfig.storage b/terraform/gce/kconfigs/Kconfig.storage index 717f50dfe..1d5690dcb 100644 --- a/terraform/gce/kconfigs/Kconfig.storage +++ b/terraform/gce/kconfigs/Kconfig.storage @@ -206,6 +206,18 @@ config TERRAFORM_GCE_DISK_THROUGHPUT depends on TERRAFORM_GCE_DISK_NEEDS_THROUGHPUT config TERRAFORM_GCE_DATA_VOLUME_DEVICE_FILE_NAME - string + string "Data volume's device file name" output yaml default "/dev/disk/by-id/google-persistent-disk-1" + help + This option sets the name of the block device on each + target node that is to be used for the /data file system. + +config TERRAFORM_GCE_SPARSE_VOLUME_DEVICE_FILE_NAME + string "Sparse volume's device file name" + output yaml + default "/dev/disk/by-id/google-persistent-disk-2" + help + This option sets the name of the block device on each + target node that can be used for the fstests sparse device + tests. diff --git a/terraform/oci/kconfigs/Kconfig.storage b/terraform/oci/kconfigs/Kconfig.storage index 15a2a05bb..f378e0b87 100644 --- a/terraform/oci/kconfigs/Kconfig.storage +++ b/terraform/oci/kconfigs/Kconfig.storage @@ -259,11 +259,14 @@ config TERRAFORM_OCI_DATA_VOLUME_DEVICE_FILE_NAME output yaml default "/dev/oracleoci/oraclevdb" help - Data volume's device file name + This option sets the name of the block device on each + target node that is to be used for the /data file system. config TERRAFORM_OCI_SPARSE_VOLUME_DEVICE_FILE_NAME string "Sparse volume's device file name" output yaml default "/dev/oracleoci/oraclevdc" help - Sparse volume's device file name + This option sets the name of the block device on each + target node that can be used for the fstests sparse device + tests. From 2f730a863ae80c34a1b9a6136bb195e36854f988 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Aug 2025 22:07:33 -0400 Subject: [PATCH 3/4] fstests: Better defaults for the fstests sparse device filename The cloud provider settings now consistently provide the correct device pathnames for the /data and sparse devices. Set up the default pathname for the fstests sparse device properly for those cases. Signed-off-by: Chuck Lever --- workflows/fstests/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workflows/fstests/Kconfig b/workflows/fstests/Kconfig index a878b932c..9afa03cf8 100644 --- a/workflows/fstests/Kconfig +++ b/workflows/fstests/Kconfig @@ -539,13 +539,13 @@ config FSTESTS_SPARSE_DEV default "/dev/disk/by-id/virtio-kdevops1" if LIBVIRT && LIBVIRT_EXTRA_STORAGE_DRIVE_VIRTIO default "/dev/disk/by-id/ata-QEMU_HARDDISK_kdevops1" if LIBVIRT && LIBVIRT_EXTRA_STORAGE_DRIVE_IDE default "/dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_kdevops1" if LIBVIRT && LIBVIRT_EXTRA_STORAGE_DRIVE_SCSI - default "/dev/nvme2n1" if TERRAFORM_AWS_INSTANCE_M5AD_4XLARGE - default "/dev/nvme1n1" if TERRAFORM_GCE - default "/dev/sdd" if TERRAFORM_AZURE + default TERRAFORM_AWS_SPARSE_VOLUME_DEVICE_FILE_NAME if TERRAFORM_AWS + default TERRAFORM_GCE_SPARSE_VOLUME_DEVICE_FILE_NAME if TERRAFORM_GCE + default TERRAFORM_AZURE_SPARSE_VOLUME_DEVICE_FILE_NAME if TERRAFORM_AZURE default TERRAFORM_OCI_SPARSE_VOLUME_DEVICE_FILE_NAME if TERRAFORM_OCI help - The device to use to create a filesystem on where we will place - sparse files. + The device pathname to use to create a filesystem where + fstests places sparse files. choice prompt "Filesystem to use where sparse files will live" From 9d6332ca3121369cfc1250931b555c2f800ed3c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Aug 2025 11:29:01 -0400 Subject: [PATCH 4/4] volume_group: Use cloud provider's default data disk Thanks to recent commits, the volume_groups role can now use {{ data_device }} for AWS just like it does for other cloud providers. Signed-off-by: Chuck Lever --- playbooks/roles/volume_group/tasks/terraform/aws.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/playbooks/roles/volume_group/tasks/terraform/aws.yml b/playbooks/roles/volume_group/tasks/terraform/aws.yml index ee7bfa06f..e26776407 100644 --- a/playbooks/roles/volume_group/tasks/terraform/aws.yml +++ b/playbooks/roles/volume_group/tasks/terraform/aws.yml @@ -28,14 +28,14 @@ register: terraform_output changed_when: false -# FIXME: Stuff "/dev/sdf" into the data_device variable for AWS - name: Exclude the device that will house the /data file system vars: block_device_dict: "{{ terraform_output.stdout | from_json }}" local_map: "{{ block_device_dict[inventory_hostname] }}" ansible.builtin.set_fact: ebs_volume_ids: "{{ ebs_volume_ids + ['nvme-Amazon_Elastic_Block_Store_' + item.value | regex_replace('-', '')] }}" - when: item.key != "/dev/sdf" + when: + item.key != data_device with_dict: "{{ local_map }}" - name: Add unused EBS volumes to the volume list