From 2a52d17568ebf8bb68347ad761e09169966779cb Mon Sep 17 00:00:00 2001 From: mrveiss Date: Thu, 19 Mar 2026 13:07:24 +0200 Subject: [PATCH 1/3] fix(slm): add ansible_name column to Node for Ansible targeting (#1814) Node.hostname stores display names (e.g., '00-SLM-Manager') which break Ansible --limit operations. This adds an explicit ansible_name column for Ansible inventory host targeting. The reconciler now uses ansible_name (with ip_address fallback) instead of hostname when executing playbooks. --- autobot-slm-backend/api/nodes.py | 2 ++ .../migrations/add_node_ansible_name.py | 29 +++++++++++++++++++ autobot-slm-backend/models/database.py | 1 + autobot-slm-backend/models/schemas.py | 3 ++ autobot-slm-backend/services/reconciler.py | 13 +++++---- 5 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 autobot-slm-backend/migrations/add_node_ansible_name.py diff --git a/autobot-slm-backend/api/nodes.py b/autobot-slm-backend/api/nodes.py index c1708fd73..da595e081 100644 --- a/autobot-slm-backend/api/nodes.py +++ b/autobot-slm-backend/api/nodes.py @@ -432,6 +432,7 @@ async def create_node( node = Node( node_id=node_id, hostname=node_data.hostname, + ansible_name=node_data.ansible_name, # Issue #1814 ip_address=node_data.ip_address, roles=node_data.roles, ssh_user=node_data.ssh_user, @@ -1395,6 +1396,7 @@ async def replace_node( new_node = Node( node_id=new_node_id, hostname=node_data.hostname, + ansible_name=node_data.ansible_name, # Issue #1814 ip_address=node_data.ip_address, roles=node_data.roles, ssh_user=node_data.ssh_user, diff --git a/autobot-slm-backend/migrations/add_node_ansible_name.py b/autobot-slm-backend/migrations/add_node_ansible_name.py new file mode 100644 index 000000000..6ae00b9b9 --- /dev/null +++ b/autobot-slm-backend/migrations/add_node_ansible_name.py @@ -0,0 +1,29 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Migration: Add ansible_name column to nodes table (#1814). + +The hostname column stores user-facing display names (e.g., '00-SLM-Manager') +which are unsuitable for Ansible --limit and SSH operations. This adds an +explicit ansible_name column for Ansible inventory host targeting, with +ip_address fallback for nodes where it is not set. +""" + +import logging + +from migrations.utils import add_column_if_not_exists, get_connection + +logger = logging.getLogger(__name__) + + +def migrate(db_url: str) -> None: + """Add ansible_name column to nodes table (#1814).""" + conn = get_connection(db_url) + cursor = conn.cursor() + + add_column_if_not_exists(cursor, "nodes", "ansible_name", "VARCHAR(255)") + + conn.commit() + conn.close() + logger.info("Migration: added ansible_name column to nodes") diff --git a/autobot-slm-backend/models/database.py b/autobot-slm-backend/models/database.py index ed6472d5a..dbb807f26 100644 --- a/autobot-slm-backend/models/database.py +++ b/autobot-slm-backend/models/database.py @@ -96,6 +96,7 @@ class Node(Base): id = Column(Integer, primary_key=True, autoincrement=True) node_id = Column(String(64), unique=True, nullable=False, index=True) hostname = Column(String(255), nullable=False) + ansible_name = Column(String(255), nullable=True) # Issue #1814 ip_address = Column(String(45), nullable=False) status = Column(String(20), default=NodeStatus.PENDING.value) roles = Column(JSON, default=list) diff --git a/autobot-slm-backend/models/schemas.py b/autobot-slm-backend/models/schemas.py index a9f0a7938..7b0f49fea 100644 --- a/autobot-slm-backend/models/schemas.py +++ b/autobot-slm-backend/models/schemas.py @@ -136,6 +136,7 @@ class NodeCreate(BaseModel): """Node registration request.""" hostname: str + ansible_name: Optional[str] = None # Ansible inventory name (#1814) ip_address: str node_id: Optional[ str @@ -153,6 +154,7 @@ class NodeUpdate(BaseModel): """Node update request.""" hostname: Optional[str] = None + ansible_name: Optional[str] = None # Ansible inventory name (#1814) ip_address: Optional[str] = None status: Optional[NodeStatus] = None roles: Optional[List[str]] = None @@ -164,6 +166,7 @@ class NodeResponse(BaseModel): id: int node_id: str hostname: str + ansible_name: Optional[str] = None # Ansible inventory name (#1814) ip_address: str status: str roles: Optional[List[str]] = [] diff --git a/autobot-slm-backend/services/reconciler.py b/autobot-slm-backend/services/reconciler.py index 8bce64b60..56856e393 100644 --- a/autobot-slm-backend/services/reconciler.py +++ b/autobot-slm-backend/services/reconciler.py @@ -16,6 +16,7 @@ from datetime import datetime, timedelta from typing import Dict, List, Optional +from config import settings from models.database import ( Deployment, DeploymentStatus, @@ -33,8 +34,6 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from config import settings - logger = logging.getLogger(__name__) # Role to systemd service mapping @@ -471,9 +470,10 @@ async def _remediate_node(self, db: AsyncSession, node: Node) -> bool: message=f"Attempting to restart SLM agent on {node.hostname}", ) - # Try to restart the SLM agent via Ansible + # Try to restart the SLM agent via Ansible (#1814: prefer ansible_name) + ansible_target = node.ansible_name or node.ip_address success = await self._restart_service_via_ansible( - node.hostname, + ansible_target, "slm-agent", ) @@ -757,9 +757,10 @@ async def _remediate_failed_service( message=f"Attempting to restart {service.service_name} on {node.hostname}", ) - # Try to restart via Ansible + # Try to restart via Ansible (#1814: prefer ansible_name) + ansible_target = node.ansible_name or node.ip_address success = await self._restart_service_via_ansible( - node.hostname, + ansible_target, service.service_name, ) From cbecfbc55e020fa3961af71f4a1252b34649d1dc Mon Sep 17 00:00:00 2001 From: mrveiss Date: Thu, 19 Mar 2026 15:27:18 +0200 Subject: [PATCH 2/3] fix(slm): register migration and fix remaining hostname callers (#1814) Code review found: migration was not registered in runner.py, and services.py, tls.py, setup_wizard.py still passed node.hostname to Ansible --limit. All callers now use ansible_name with IP fallback. --- autobot-slm-backend/api/services.py | 3 ++- autobot-slm-backend/api/setup_wizard.py | 3 ++- autobot-slm-backend/api/tls.py | 3 ++- autobot-slm-backend/migrations/runner.py | 2 ++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/autobot-slm-backend/api/services.py b/autobot-slm-backend/api/services.py index 948bbaf23..7837c1e17 100644 --- a/autobot-slm-backend/api/services.py +++ b/autobot-slm-backend/api/services.py @@ -375,9 +375,10 @@ async def scan_node_services( from services.playbook_executor import get_playbook_executor executor = get_playbook_executor() + ansible_target = node.ansible_name or node.ip_address # #1814 result = await executor.execute_playbook( playbook_name="discover-services.yml", - limit=[node.hostname], + limit=[ansible_target], ) if not result.get("success"): diff --git a/autobot-slm-backend/api/setup_wizard.py b/autobot-slm-backend/api/setup_wizard.py index 94ccf15bb..fd942f0eb 100644 --- a/autobot-slm-backend/api/setup_wizard.py +++ b/autobot-slm-backend/api/setup_wizard.py @@ -184,7 +184,8 @@ async def _generate_dynamic_inventory( } if node.ssh_port and node.ssh_port != 22: host_vars["ansible_port"] = node.ssh_port - hosts[node.hostname] = host_vars + inventory_name = node.ansible_name or node.ip_address # #1814 + hosts[inventory_name] = host_vars node_id_to_hostname[node.node_id] = node.hostname node_id_to_ip[node.node_id] = node.ip_address diff --git a/autobot-slm-backend/api/tls.py b/autobot-slm-backend/api/tls.py index 61476c781..d41526ca3 100644 --- a/autobot-slm-backend/api/tls.py +++ b/autobot-slm-backend/api/tls.py @@ -1017,9 +1017,10 @@ async def _execute_cert_deployment( if chain_path: extra_vars["chain_file"] = chain_path + ansible_target = node.ansible_name or node.ip_address # #1814 result = await executor.execute_playbook( playbook_name="deploy-certificate.yml", - limit=[node.hostname], + limit=[ansible_target], extra_vars=extra_vars, ) diff --git a/autobot-slm-backend/migrations/runner.py b/autobot-slm-backend/migrations/runner.py index 90afbb9e4..ec3e1e4ab 100644 --- a/autobot-slm-backend/migrations/runner.py +++ b/autobot-slm-backend/migrations/runner.py @@ -54,6 +54,8 @@ # Issue #1900: consolidate slm_users (integer PK) into users (UUID PK) # and drop the now-orphaned slm_users table. "consolidate_slm_users_to_uuid", + # Issue #1814: add ansible_name column for proper Ansible targeting + "add_node_ansible_name", ] From 045c07b9ce500ff893c9055c5a4a64b255b4b8e3 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Thu, 19 Mar 2026 23:10:01 +0200 Subject: [PATCH 3/3] fix(slm): add ansible_target property, fix fact extraction key (#1814) - Add Node.ansible_target property (single source of truth for targeting) - Fix fact extraction key in services.py to match --limit value - Replace all inline ansible_name-or-ip patterns with the property --- autobot-slm-backend/api/services.py | 4 ++-- autobot-slm-backend/api/setup_wizard.py | 2 +- autobot-slm-backend/api/tls.py | 2 +- autobot-slm-backend/models/database.py | 5 +++++ autobot-slm-backend/services/reconciler.py | 4 ++-- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/autobot-slm-backend/api/services.py b/autobot-slm-backend/api/services.py index 7837c1e17..5c81a097e 100644 --- a/autobot-slm-backend/api/services.py +++ b/autobot-slm-backend/api/services.py @@ -375,7 +375,7 @@ async def scan_node_services( from services.playbook_executor import get_playbook_executor executor = get_playbook_executor() - ansible_target = node.ansible_name or node.ip_address # #1814 + ansible_target = node.ansible_target # #1814 result = await executor.execute_playbook( playbook_name="discover-services.yml", limit=[ansible_target], @@ -389,7 +389,7 @@ async def scan_node_services( # Extract service facts from Ansible result # service_facts returns ansible_facts.services as a dict services_data = ( - result.get("facts", {}).get(node.hostname, {}).get("services", {}) + result.get("facts", {}).get(ansible_target, {}).get("services", {}) ) if not services_data: diff --git a/autobot-slm-backend/api/setup_wizard.py b/autobot-slm-backend/api/setup_wizard.py index fd942f0eb..cf8b1aab7 100644 --- a/autobot-slm-backend/api/setup_wizard.py +++ b/autobot-slm-backend/api/setup_wizard.py @@ -184,7 +184,7 @@ async def _generate_dynamic_inventory( } if node.ssh_port and node.ssh_port != 22: host_vars["ansible_port"] = node.ssh_port - inventory_name = node.ansible_name or node.ip_address # #1814 + inventory_name = node.ansible_target # #1814 hosts[inventory_name] = host_vars node_id_to_hostname[node.node_id] = node.hostname node_id_to_ip[node.node_id] = node.ip_address diff --git a/autobot-slm-backend/api/tls.py b/autobot-slm-backend/api/tls.py index d41526ca3..a24b5d757 100644 --- a/autobot-slm-backend/api/tls.py +++ b/autobot-slm-backend/api/tls.py @@ -1017,7 +1017,7 @@ async def _execute_cert_deployment( if chain_path: extra_vars["chain_file"] = chain_path - ansible_target = node.ansible_name or node.ip_address # #1814 + ansible_target = node.ansible_target # #1814 result = await executor.execute_playbook( playbook_name="deploy-certificate.yml", limit=[ansible_target], diff --git a/autobot-slm-backend/models/database.py b/autobot-slm-backend/models/database.py index dbb807f26..f2b00ba8a 100644 --- a/autobot-slm-backend/models/database.py +++ b/autobot-slm-backend/models/database.py @@ -130,6 +130,11 @@ class Node(Base): created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + @property + def ansible_target(self) -> str: + """Best identifier for Ansible --limit targeting (#1814).""" + return self.ansible_name or self.ip_address + class Deployment(Base): """Deployment model for tracking role deployments.""" diff --git a/autobot-slm-backend/services/reconciler.py b/autobot-slm-backend/services/reconciler.py index 56856e393..a66729d9e 100644 --- a/autobot-slm-backend/services/reconciler.py +++ b/autobot-slm-backend/services/reconciler.py @@ -471,7 +471,7 @@ async def _remediate_node(self, db: AsyncSession, node: Node) -> bool: ) # Try to restart the SLM agent via Ansible (#1814: prefer ansible_name) - ansible_target = node.ansible_name or node.ip_address + ansible_target = node.ansible_target success = await self._restart_service_via_ansible( ansible_target, "slm-agent", @@ -758,7 +758,7 @@ async def _remediate_failed_service( ) # Try to restart via Ansible (#1814: prefer ansible_name) - ansible_target = node.ansible_name or node.ip_address + ansible_target = node.ansible_target success = await self._restart_service_via_ansible( ansible_target, service.service_name,