From 5aaf9063a31ff771202939c14b5dbdf8a3af96da Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 10 Dec 2025 15:50:16 +0000 Subject: [PATCH 1/9] use ansible-init user for cluster share --- ansible/roles/cacerts/tasks/export.yml | 6 +- .../roles/compute_init/files/compute-init.yml | 4 +- ansible/roles/compute_init/tasks/export.yml | 66 +++++++++---------- .../templates/os_manila_mount_share_info.j2 | 1 + ansible/roles/nhc/tasks/export.yml | 4 +- ansible/roles/sssd/tasks/export.yml | 4 +- .../inventory/group_vars/all/defaults.yml | 12 ++++ .../common/inventory/group_vars/all/nfs.yml | 1 + 8 files changed, 54 insertions(+), 44 deletions(-) create mode 100644 ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 diff --git a/ansible/roles/cacerts/tasks/export.yml b/ansible/roles/cacerts/tasks/export.yml index 8e036a156..f3070627a 100644 --- a/ansible/roles/cacerts/tasks/export.yml +++ b/ansible/roles/cacerts/tasks/export.yml @@ -3,9 +3,9 @@ ansible.builtin.copy: src: "{{ item }}" dest: /exports/cluster/cacerts/ - owner: slurm - group: root - mode: "0644" + owner: ansible-init + group: ansible-init + mode: u=rw,go= with_fileglob: - "{{ cacerts_cert_dir }}/*" delegate_to: "{{ groups['control'] | first }}" diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index 91eaa86c1..059f0a458 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -83,7 +83,7 @@ - ansible.builtin.meta: end_play - name: Check if hostvars exist become: true - become_user: slurm + become_user: ansible-init # share is root-squashed ansible.builtin.stat: path: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml" register: hostvars_stat @@ -98,7 +98,7 @@ - ansible.builtin.meta: end_play - name: Sync /mnt/cluster to /var/tmp become: true - become_user: slurm + become_user: ansible-init # share is root-squashed ansible.posix.synchronize: src: "/mnt/cluster/" dest: "/var/tmp/cluster/" diff --git a/ansible/roles/compute_init/tasks/export.yml b/ansible/roles/compute_init/tasks/export.yml index caf0cdc67..6be600c78 100644 --- a/ansible/roles/compute_init/tasks/export.yml +++ b/ansible/roles/compute_init/tasks/export.yml @@ -1,66 +1,60 @@ --- -- name: Ensure the /exports/cluster directory exists +- name: Ensure /exports/cluster directory structure exists ansible.builtin.file: path: /exports/cluster state: directory - owner: slurm - group: root + owner: ansible-init + group: ansible-init mode: u=rX,g=rwX,o= run_once: true + loop: + - /exports/cluster + - /exports/cluster/hostvars + - /exports/cluster/cacerts + - /exports/cluster/cvmfs + - /exports/cluster/hostconfig + delegate_to: "{{ groups['control'] | first }}" - name: Copy /etc/hosts to /exports/cluster ansible.builtin.copy: src: /etc/hosts dest: /exports/cluster/hosts - owner: slurm - group: root - mode: u=r,g=rw,o= + owner: ansible-init + group: ansible-init + mode: u=rw,go=r remote_src: true run_once: true delegate_to: "{{ groups['control'] | first }}" -- name: Create hostvars directory +- name: Create per-host hostvars directory ansible.builtin.file: path: /exports/cluster/hostvars/{{ inventory_hostname }}/ state: directory - owner: slurm - group: root - mode: u=rX,g=rwX,o= + owner: ansible-init + group: ansible-init + mode: u=rwX,go= delegate_to: "{{ groups['control'] | first }}" - name: Template out hostvars ansible.builtin.template: src: hostvars.yml.j2 dest: /exports/cluster/hostvars/{{ inventory_hostname }}/hostvars.yml - owner: slurm - group: root - mode: u=r,g=rw,o= + owner: ansible-init + group: ansible-init + mode: u=rw,go= delegate_to: "{{ groups['control'] | first }}" -- name: Copy manila share info to /exports/cluster - ansible.builtin.copy: - content: "{{ os_manila_mount_share_info_var | to_nice_yaml }}" +- name: Template manila share info to /exports/cluster + ansible.builtin.template: + src: os_manila_mount_share_info.j2 dest: /exports/cluster/manila_share_info.yml - owner: slurm - group: root - mode: u=r,g=rw,o= + owner: ansible-init + group: ansible-init + mode: u=rw,go= run_once: true delegate_to: "{{ groups['control'] | first }}" when: os_manila_mount_share_info is defined - vars: - os_manila_mount_share_info_var: - os_manila_mount_share_info: "{{ os_manila_mount_share_info }}" - -- name: Ensure /exports/cluster/cvmfs directory exists - ansible.builtin.file: - path: /exports/cluster/cvmfs - state: directory - owner: slurm - group: root - mode: "0755" - run_once: true - delegate_to: "{{ groups['control'] | first }}" - name: Export cacerts ansible.builtin.include_role: @@ -68,13 +62,13 @@ tasks_from: export.yml when: "'cacerts' in group_names" -- name: Create hostconfig directory +- name: Create per-host hostconfig directory ansible.builtin.file: path: "/exports/cluster/hostconfig/{{ inventory_hostname }}/" state: directory - owner: slurm - group: root - mode: u=rX,g=rwX,o= + owner: ansible-init + group: ansible-init + mode: u=rwX,go= delegate_to: "{{ groups['control'] | first }}" - name: Template sssd config diff --git a/ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 b/ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 new file mode 100644 index 000000000..b47dec719 --- /dev/null +++ b/ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 @@ -0,0 +1 @@ +{{ os_manila_mount_share_info_var | to_nice_yaml }} diff --git a/ansible/roles/nhc/tasks/export.yml b/ansible/roles/nhc/tasks/export.yml index d6b1120e4..c080fdd3e 100644 --- a/ansible/roles/nhc/tasks/export.yml +++ b/ansible/roles/nhc/tasks/export.yml @@ -3,5 +3,7 @@ ansible.builtin.template: src: "{{ nhc_config_template }}" dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/nhc.conf" - mode: "0644" + owner: ansible-init + group: ansible-init + mode: u=rw,go= delegate_to: "{{ groups['control'] | first }}" diff --git a/ansible/roles/sssd/tasks/export.yml b/ansible/roles/sssd/tasks/export.yml index 607878648..3d95a0598 100644 --- a/ansible/roles/sssd/tasks/export.yml +++ b/ansible/roles/sssd/tasks/export.yml @@ -4,7 +4,7 @@ ansible.builtin.template: src: "{{ sssd_conf_src }}" dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/sssd.conf" - owner: root - group: root + owner: ansible-init + group: ansible-init mode: u=rw,go= delegate_to: "{{ groups['control'] | first }}" diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index e9852afd6..4ab4d30df 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -77,6 +77,18 @@ appliances_local_users_default: shell: /sbin/nologin system: true enable: "{{ 'grafana' in group_names }}" + + - group: + name: ansible-init + gid: 301 + user: + name: ansible-init + comment: ansible-init user + uid: 301 + create_home: false + shell: /sbin/nologin + system: true + enable: "{{ 'ansible_init' in group_names }}" # Overide this to add extra users whilst keeping the defaults. appliances_local_users_extra: [] # see format of appliances_local_users_default above diff --git a/environments/common/inventory/group_vars/all/nfs.yml b/environments/common/inventory/group_vars/all/nfs.yml index 05ecd8900..76aa63ff0 100644 --- a/environments/common/inventory/group_vars/all/nfs.yml +++ b/environments/common/inventory/group_vars/all/nfs.yml @@ -31,6 +31,7 @@ nfs_configuration_compute_nodes: # cluster configuration for compute_init/slurm- server: "{{ inventory_hostname in groups['control'] }}" clients: false nfs_export: "/exports/cluster" + nfs_export_options: "ro,secure,root_squash" nfs_configurations_extra: [] # site-specific nfs shares From 16d60ee654d5d9149cf4764ee18416e058c2cb4f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 11 Dec 2025 13:59:14 +0000 Subject: [PATCH 2/9] fix pointless linter errors --- ansible/roles/compute_init/tasks/export.yml | 1 - environments/common/inventory/group_vars/all/defaults.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ansible/roles/compute_init/tasks/export.yml b/ansible/roles/compute_init/tasks/export.yml index 6be600c78..b141ddd13 100644 --- a/ansible/roles/compute_init/tasks/export.yml +++ b/ansible/roles/compute_init/tasks/export.yml @@ -13,7 +13,6 @@ - /exports/cluster/cacerts - /exports/cluster/cvmfs - /exports/cluster/hostconfig - delegate_to: "{{ groups['control'] | first }}" - name: Copy /etc/hosts to /exports/cluster diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 4ab4d30df..b69858499 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -77,7 +77,7 @@ appliances_local_users_default: shell: /sbin/nologin system: true enable: "{{ 'grafana' in group_names }}" - + - group: name: ansible-init gid: 301 From 7ef831d0fddcd8a6bacd24b8e33cffa828d2f600 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 12 Dec 2025 11:07:40 +0000 Subject: [PATCH 3/9] bump CI image --- environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 04730140e..ed77a2455 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-251211-0951-51b93e3f", - "RL9": "openhpc-RL9-251211-0951-51b93e3f" + "RL8": "openhpc-RL8-251212-1030-1622e1fa", + "RL9": "openhpc-RL9-251212-1031-1622e1fa" } } From d12e658aad98a04b9251e4ddca270fddc38264f0 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 12 Dec 2025 15:41:23 +0000 Subject: [PATCH 4/9] fix ansible-init gid --- environments/common/inventory/group_vars/all/defaults.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index b69858499..74fbd2c8f 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -78,10 +78,7 @@ appliances_local_users_default: system: true enable: "{{ 'grafana' in group_names }}" - - group: - name: ansible-init - gid: 301 - user: + - user: name: ansible-init comment: ansible-init user uid: 301 From e7854c1e036b1338905c913b5a7d44839b4d14a5 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 12 Dec 2025 17:48:35 +0000 Subject: [PATCH 5/9] bump CI image --- environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index ed77a2455..42ff42e3f 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-251212-1030-1622e1fa", - "RL9": "openhpc-RL9-251212-1031-1622e1fa" + "RL8": "openhpc-RL8-251212-1638-d12e658a", + "RL9": "openhpc-RL9-251212-1638-d12e658a" } } From bce36cd5fe7263550e5e04d26ae300f99b79224e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 12 Dec 2025 19:56:01 +0000 Subject: [PATCH 6/9] try to workaround user creation hanging in-cluster --- environments/common/inventory/group_vars/all/defaults.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 74fbd2c8f..66e4088b0 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -83,6 +83,7 @@ appliances_local_users_default: comment: ansible-init user uid: 301 create_home: false + home: /var/lib/ansible-init shell: /sbin/nologin system: true enable: "{{ 'ansible_init' in group_names }}" From 6bd79088b5ba4dfb9dbadd8a60421eb429c8385b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 12 Dec 2025 20:52:01 +0000 Subject: [PATCH 7/9] bump CI image --- environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 42ff42e3f..9be3f2a06 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-251212-1638-d12e658a", - "RL9": "openhpc-RL9-251212-1638-d12e658a" + "RL8": "openhpc-RL8-251212-2014-bce36cd5", + "RL9": "openhpc-RL9-251212-2014-bce36cd5" } } From 312737665511b00b502ea4a3e70de597cdb59dbc Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Sat, 13 Dec 2025 11:28:23 +0000 Subject: [PATCH 8/9] revert manila changes --- ansible/roles/compute_init/tasks/export.yml | 15 +++++++++------ .../templates/os_manila_mount_share_info.j2 | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) delete mode 100644 ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 diff --git a/ansible/roles/compute_init/tasks/export.yml b/ansible/roles/compute_init/tasks/export.yml index b141ddd13..21e0ea4b5 100644 --- a/ansible/roles/compute_init/tasks/export.yml +++ b/ansible/roles/compute_init/tasks/export.yml @@ -44,16 +44,19 @@ mode: u=rw,go= delegate_to: "{{ groups['control'] | first }}" -- name: Template manila share info to /exports/cluster - ansible.builtin.template: - src: os_manila_mount_share_info.j2 +- name: Copy manila share info to /exports/cluster + ansible.builtin.copy: + content: "{{ os_manila_mount_share_info_var | to_nice_yaml }}" dest: /exports/cluster/manila_share_info.yml - owner: ansible-init - group: ansible-init - mode: u=rw,go= + owner: slurm + group: root + mode: u=r,g=rw,o= run_once: true delegate_to: "{{ groups['control'] | first }}" when: os_manila_mount_share_info is defined + vars: + os_manila_mount_share_info_var: + os_manila_mount_share_info: "{{ os_manila_mount_share_info }}" - name: Export cacerts ansible.builtin.include_role: diff --git a/ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 b/ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 deleted file mode 100644 index b47dec719..000000000 --- a/ansible/roles/compute_init/templates/os_manila_mount_share_info.j2 +++ /dev/null @@ -1 +0,0 @@ -{{ os_manila_mount_share_info_var | to_nice_yaml }} From abb8bd5d410979bc30121bc666337ad0b6209a5c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Sat, 13 Dec 2025 12:15:14 +0000 Subject: [PATCH 9/9] bump CI image --- environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 9be3f2a06..ceeb8080f 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-251212-2014-bce36cd5", - "RL9": "openhpc-RL9-251212-2014-bce36cd5" + "RL8": "openhpc-RL8-251213-1133-31273766", + "RL9": "openhpc-RL9-251213-1133-31273766" } }