From b54cce66b9e4c17963e6db56a923cd231679eb91 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 2 Apr 2026 16:46:21 -0700
Subject: [PATCH 01/35] Added new feature to execute workflows without a
 kubernetes cluster, for faster iteration when developing workflows

---
 src/cli/local.py                       |   72 ++
 src/cli/main_parser.py                 |    4 +-
 src/utils/BUILD                        |   10 +
 src/utils/local_executor.py            |  299 +++++
 src/utils/tests/BUILD                  |   10 +
 src/utils/tests/test_local_executor.py | 1406 ++++++++++++++++++++++++
 6 files changed, 1800 insertions(+), 1 deletion(-)
 create mode 100644 src/cli/local.py
 create mode 100644 src/utils/local_executor.py
 create mode 100644 src/utils/tests/test_local_executor.py

diff --git a/src/cli/local.py b/src/cli/local.py
new file mode 100644
index 000000000..a9f12eaaf
--- /dev/null
+++ b/src/cli/local.py
@@ -0,0 +1,72 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import argparse
+import sys
+
+import shtab
+
+from src.utils import local_executor
+
+
+def setup_parser(parser: argparse._SubParsersAction):
+    local_parser = parser.add_parser(
+        'local',
+        help='Run workflows locally using Docker (no Kubernetes cluster required).')
+    subparsers = local_parser.add_subparsers(dest='command')
+    subparsers.required = True
+
+    run_parser = subparsers.add_parser(
+        'run',
+        help='Execute a workflow spec locally using Docker containers.')
+    run_parser.add_argument(
+        '-f', '--file',
+        required=True,
+        dest='workflow_file',
+        help='Path to the workflow YAML spec file.').complete = shtab.FILE
+    run_parser.add_argument(
+        '--work-dir',
+        dest='work_dir',
+        default=None,
+        help='Directory for task inputs/outputs. Defaults to a temporary directory.')
+    run_parser.add_argument(
+        '--keep',
+        action='store_true',
+        default=False,
+        help='Keep the work directory after execution (always kept on failure).')
+    run_parser.add_argument(
+        '--docker',
+        dest='docker_cmd',
+        default='docker',
+        help='Docker-compatible command to use (e.g. podman). Default: docker.')
+    run_parser.set_defaults(func=_run_local)
+
+
+def _run_local(service_client, args: argparse.Namespace):
+    try:
+        success = local_executor.run_workflow_locally(
+            spec_path=args.workflow_file,
+            work_dir=args.work_dir,
+            keep_work_dir=args.keep,
+        )
+    except ValueError as error:
+        print(f'Error: {error}', file=sys.stderr)
+        sys.exit(1)
+
+    if not success:
+        sys.exit(1)
diff --git a/src/cli/main_parser.py b/src/cli/main_parser.py
index 79484ee16..bd097111d 100644
--- a/src/cli/main_parser.py
+++ b/src/cli/main_parser.py
@@ -28,6 +28,7 @@
     credential,
     data,
     dataset,
+    local,
     login,
     pool,
     profile,
@@ -55,7 +56,8 @@
     profile.setup_parser,
     pool.setup_parser,
     user.setup_parser,
-    config.setup_parser
+    config.setup_parser,
+    local.setup_parser,
 )
 
 
diff --git a/src/utils/BUILD b/src/utils/BUILD
index 77f45aafb..8a29aa9af 100644
--- a/src/utils/BUILD
+++ b/src/utils/BUILD
@@ -126,3 +126,13 @@ osmo_py_library(
     ],
     visibility = ["//visibility:public"],
 )
+
+osmo_py_library(
+    name = "local_executor",
+    srcs = ["local_executor.py"],
+    deps = [
+        requirement("pyyaml"),
+        "//src/utils/job",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
new file mode 100644
index 000000000..c1926db34
--- /dev/null
+++ b/src/utils/local_executor.py
@@ -0,0 +1,299 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import dataclasses
+import logging
+import os
+import re
+import shutil
+import subprocess
+import tempfile
+from typing import Dict, List, Set
+
+import yaml
+
+from src.utils.job import task as task_module
+from src.utils.job import workflow as workflow_module
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclasses.dataclass
+class TaskNode:
+    name: str
+    spec: task_module.TaskSpec
+    group: str
+    upstream: Set[str] = dataclasses.field(default_factory=set)
+    downstream: Set[str] = dataclasses.field(default_factory=set)
+
+
+@dataclasses.dataclass
+class TaskResult:
+    name: str
+    exit_code: int
+    output_dir: str
+
+
+class LocalExecutor:
+    """
+    Executes an OSMO workflow spec locally using Docker, without Kubernetes.
+
+    Supports:
+      - Serial and parallel task DAGs (groups flattened to individual tasks)
+      - {{output}} and {{input:N}} / {{input:taskname}} token substitution
+      - Inline `files:` written to the container
+      - `environment:` passed as Docker env vars
+      - Task-to-task data flow via shared local directories
+
+    Does NOT support (raises clear errors):
+      - Dataset / URL inputs/outputs (require object storage)
+      - Credentials, checkpoints, volumeMounts (require cluster infra)
+      - Templated specs with Jinja (require server-side expansion; use --dry-run first)
+    """
+
+    def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str = 'docker'):
+        self._work_dir = work_dir
+        self._keep_work_dir = keep_work_dir
+        self._docker_cmd = docker_cmd
+        self._task_nodes: Dict[str, TaskNode] = {}
+        self._results: Dict[str, TaskResult] = {}
+
+    def load_spec(self, spec_text: str) -> workflow_module.WorkflowSpec:
+        raw = yaml.safe_load(spec_text)
+        versioned = workflow_module.VersionedWorkflowSpec(**raw)
+        return versioned.workflow
+
+    def execute(self, spec: workflow_module.WorkflowSpec) -> bool:
+        self._build_dag(spec)
+        self._validate_for_local(spec)
+        self._setup_directories()
+
+        logger.info('Workflow "%s": %d task(s) across %d group(s)',
+                     spec.name, sum(len(g.tasks) for g in self._groups(spec)), len(self._groups(spec)))
+
+        ready = self._find_ready_tasks()
+        while ready:
+            for task_name in ready:
+                node = self._task_nodes[task_name]
+                logger.info('--- Running task: %s (image: %s) ---', task_name, node.spec.image)
+                result = self._run_task(node, spec)
+                self._results[task_name] = result
+
+                if result.exit_code != 0:
+                    logger.error('Task "%s" failed with exit code %d', task_name, result.exit_code)
+                    self._cancel_downstream(task_name)
+                    return False
+
+                logger.info('Task "%s" completed successfully', task_name)
+
+            ready = self._find_ready_tasks()
+
+        failed = [name for name, r in self._results.items() if r.exit_code != 0]
+        if failed:
+            logger.error('Workflow failed. Failed tasks: %s', ', '.join(failed))
+            return False
+
+        logger.info('Workflow "%s" completed successfully', spec.name)
+        return True
+
+    def _groups(self, spec: workflow_module.WorkflowSpec) -> List[task_module.TaskGroupSpec]:
+        if spec.groups:
+            return spec.groups
+        return [task_module.TaskGroupSpec(name=t.name, tasks=[t]) for t in spec.tasks]
+
+    def _build_dag(self, spec: workflow_module.WorkflowSpec):
+        self._task_nodes.clear()
+        task_to_group: Dict[str, str] = {}
+
+        for group in self._groups(spec):
+            for task_spec in group.tasks:
+                task_to_group[task_spec.name] = group.name
+                self._task_nodes[task_spec.name] = TaskNode(
+                    name=task_spec.name,
+                    spec=task_spec,
+                    group=group.name,
+                )
+
+        for group in self._groups(spec):
+            for task_spec in group.tasks:
+                for input_source in task_spec.inputs:
+                    if isinstance(input_source, task_module.TaskInputOutput):
+                        upstream_task = input_source.task
+                        if upstream_task not in self._task_nodes:
+                            raise ValueError(
+                                f'Task "{task_spec.name}" depends on unknown task "{upstream_task}"')
+                        self._task_nodes[task_spec.name].upstream.add(upstream_task)
+                        self._task_nodes[upstream_task].downstream.add(task_spec.name)
+
+    def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
+        unsupported_features = []
+        for group in self._groups(spec):
+            for task_spec in group.tasks:
+                for input_source in task_spec.inputs:
+                    if isinstance(input_source, task_module.DatasetInputOutput):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": dataset inputs require object storage')
+                    elif isinstance(input_source, task_module.URLInputOutput):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": URL inputs require network/storage access')
+
+                for output in task_spec.outputs:
+                    if isinstance(output, (task_module.DatasetInputOutput, task_module.URLInputOutput)):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": dataset/URL outputs require object storage')
+
+                if task_spec.credentials:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": credentials require the OSMO secret manager')
+
+                if task_spec.checkpoint:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": checkpoints require object storage')
+
+                if task_spec.volumeMounts:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": volumeMounts require cluster-level host paths')
+
+        if unsupported_features:
+            raise ValueError(
+                'The following features are not supported in local execution mode:\n  - '
+                + '\n  - '.join(unsupported_features))
+
+    def _setup_directories(self):
+        os.makedirs(self._work_dir, exist_ok=True)
+        for task_name in self._task_nodes:
+            os.makedirs(os.path.join(self._work_dir, task_name, 'output'), exist_ok=True)
+
+    def _find_ready_tasks(self) -> List[str]:
+        completed = set(self._results.keys())
+        ready = []
+        for name, node in self._task_nodes.items():
+            if name in completed:
+                continue
+            if node.upstream.issubset(completed):
+                all_upstream_ok = all(self._results[u].exit_code == 0 for u in node.upstream)
+                if all_upstream_ok:
+                    ready.append(name)
+        return ready
+
+    def _cancel_downstream(self, failed_task: str):
+        visited: Set[str] = set()
+        queue = [failed_task]
+        while queue:
+            current = queue.pop(0)
+            for downstream in self._task_nodes[current].downstream:
+                if downstream not in visited and downstream not in self._results:
+                    visited.add(downstream)
+                    self._results[downstream] = TaskResult(
+                        name=downstream, exit_code=-1, output_dir='')
+                    queue.append(downstream)
+
+    def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskResult:
+        task_spec = node.spec
+        task_dir = os.path.join(self._work_dir, node.name)
+        output_dir = os.path.join(task_dir, 'output')
+        files_dir = os.path.join(task_dir, 'files')
+        os.makedirs(files_dir, exist_ok=True)
+
+        token_map = self._build_token_map(node, output_dir)
+
+        for file_spec in task_spec.files:
+            resolved_contents = self._substitute_tokens(file_spec.contents, token_map)
+            host_path = os.path.join(files_dir, file_spec.path.lstrip('/'))
+            os.makedirs(os.path.dirname(host_path), exist_ok=True)
+            with open(host_path, 'w') as f:
+                f.write(resolved_contents)
+
+        resolved_command = [self._substitute_tokens(c, token_map) for c in task_spec.command]
+        resolved_args = [self._substitute_tokens(a, token_map) for a in task_spec.args]
+
+        docker_args = [self._docker_cmd, 'run', '--rm']
+
+        for key, value in task_spec.environment.items():
+            resolved_value = self._substitute_tokens(value, token_map)
+            docker_args += ['-e', f'{key}={resolved_value}']
+
+        docker_args += ['-v', f'{output_dir}:{output_dir}']
+
+        for index, input_source in enumerate(task_spec.inputs):
+            if isinstance(input_source, task_module.TaskInputOutput):
+                upstream_result = self._results[input_source.task]
+                input_mount = token_map.get(f'input:{index}', upstream_result.output_dir)
+                docker_args += ['-v', f'{upstream_result.output_dir}:{input_mount}:ro']
+
+        for file_spec in task_spec.files:
+            host_path = os.path.join(files_dir, file_spec.path.lstrip('/'))
+            docker_args += ['-v', f'{host_path}:{file_spec.path}:ro']
+
+        docker_args.append(task_spec.image)
+        docker_args += resolved_command + resolved_args
+
+        logger.debug('Docker command: %s', ' '.join(docker_args))
+
+        try:
+            process = subprocess.run(docker_args, capture_output=False)
+            return TaskResult(name=node.name, exit_code=process.returncode, output_dir=output_dir)
+        except FileNotFoundError:
+            logger.error('Docker not found. Is Docker installed and in your PATH?')
+            return TaskResult(name=node.name, exit_code=127, output_dir=output_dir)
+
+    def _build_token_map(self, node: TaskNode, output_dir: str) -> Dict[str, str]:
+        tokens: Dict[str, str] = {
+            'output': output_dir,
+        }
+        for index, input_source in enumerate(node.spec.inputs):
+            if isinstance(input_source, task_module.TaskInputOutput):
+                upstream_result = self._results[input_source.task]
+                tokens[f'input:{input_source.task}'] = upstream_result.output_dir
+                tokens[f'input:{index}'] = upstream_result.output_dir
+        return tokens
+
+    def _substitute_tokens(self, text: str, tokens: Dict[str, str]) -> str:
+        for key, value in tokens.items():
+            text = re.sub(r'\{\{\s*' + re.escape(key) + r'\s*\}\}', value, text)
+        return text
+
+
+def run_workflow_locally(spec_path: str, work_dir: str | None = None,
+                         keep_work_dir: bool = False) -> bool:
+    if work_dir is None:
+        work_dir = tempfile.mkdtemp(prefix='osmo-local-')
+        logger.info('Using temporary work directory: %s', work_dir)
+
+    with open(spec_path) as f:
+        spec_text = f.read()
+
+    template_markers = ('{%%', '{#', 'default-values')
+    if any(marker in spec_text for marker in template_markers):
+        raise ValueError(
+            'This spec uses Jinja templates which require server-side expansion.\n'
+            'Run "osmo workflow submit --dry-run -f <spec>" first to get the expanded spec,\n'
+            'then save that output and run it locally.')
+
+    executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir)
+    spec = executor.load_spec(spec_text)
+    success = executor.execute(spec)
+
+    if not keep_work_dir and success:
+        logger.info('Cleaning up work directory: %s', work_dir)
+        shutil.rmtree(work_dir, ignore_errors=True)
+    elif not success:
+        logger.info('Work directory preserved for debugging: %s', work_dir)
+
+    return success
diff --git a/src/utils/tests/BUILD b/src/utils/tests/BUILD
index 78372b738..59050591c 100644
--- a/src/utils/tests/BUILD
+++ b/src/utils/tests/BUILD
@@ -54,3 +54,13 @@ osmo_py_test(
         requirement("truststore"),
     ]
 )
+
+py_test(
+    name = "test_local_executor",
+    srcs = ["test_local_executor.py"],
+    deps = [
+        "//src/utils:local_executor",
+    ],
+    local = True,
+    tags = ["manual"],
+)
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
new file mode 100644
index 000000000..0da9ba993
--- /dev/null
+++ b/src/utils/tests/test_local_executor.py
@@ -0,0 +1,1406 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import os
+import shutil
+import subprocess
+import tempfile
+import textwrap
+import unittest
+from unittest import mock
+
+from src.utils.job import task as task_module
+from src.utils.local_executor import LocalExecutor, TaskNode, TaskResult, run_workflow_locally
+
+
+# ---------------------------------------------------------------------------
+# Helper: detect Docker availability once for the entire module
+# ---------------------------------------------------------------------------
+def _docker_available() -> bool:
+    try:
+        result = subprocess.run(
+            ['docker', 'info'],
+            capture_output=True,
+            timeout=10,
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+DOCKER_AVAILABLE = _docker_available()
+SKIP_DOCKER_MSG = 'Docker is not available on this machine'
+
+
+# ============================================================================
+# Unit tests — no Docker required; exercise parsing, DAG, tokens, validation
+# ============================================================================
+class TestLoadSpec(unittest.TestCase):
+    """Verify that real OSMO YAML specs are parsed correctly via the existing Pydantic models."""
+
+    def test_single_task_spec(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: hello-osmo
+              tasks:
+              - name: hello
+                image: ubuntu:24.04
+                command: ["echo"]
+                args: ["Hello from OSMO!"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        self.assertEqual(spec.name, 'hello-osmo')
+        self.assertEqual(len(spec.tasks), 1)
+        self.assertEqual(spec.tasks[0].name, 'hello')
+        self.assertEqual(spec.tasks[0].image, 'ubuntu:24.04')
+
+    def test_serial_tasks_spec(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial-tasks
+              tasks:
+              - name: task1
+                image: ubuntu:22.04
+                command: [sh]
+                args: [/tmp/run.sh]
+                files:
+                - contents: |
+                    echo "Hello from task1"
+                    echo "data" > {{output}}/test.txt
+                  path: /tmp/run.sh
+              - name: task2
+                image: ubuntu:22.04
+                command: [sh]
+                args: [/tmp/run.sh]
+                files:
+                - contents: |
+                    cat {{input:0}}/test.txt
+                  path: /tmp/run.sh
+                inputs:
+                - task: task1
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        self.assertEqual(spec.name, 'serial-tasks')
+        self.assertEqual(len(spec.tasks), 2)
+        first_input = spec.tasks[1].inputs[0]
+        self.assertIsInstance(first_input, task_module.TaskInputOutput)
+        if isinstance(first_input, task_module.TaskInputOutput):
+            self.assertEqual(first_input.task, 'task1')
+
+    def test_groups_spec(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: grouped
+              groups:
+              - name: first-group
+                tasks:
+                - name: leader
+                  lead: true
+                  image: ubuntu:24.04
+                  command: ["echo", "leader"]
+                - name: follower
+                  image: ubuntu:24.04
+                  command: ["echo", "follower"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        self.assertEqual(len(spec.groups), 1)
+        self.assertEqual(len(spec.groups[0].tasks), 2)
+        self.assertTrue(spec.groups[0].tasks[0].lead)
+
+    def test_versioned_spec(self):
+        spec_text = textwrap.dedent('''\
+            version: 2
+            workflow:
+              name: versioned
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        self.assertEqual(spec.name, 'versioned')
+
+    def test_invalid_version_rejected(self):
+        spec_text = textwrap.dedent('''\
+            version: 99
+            workflow:
+              name: bad-version
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        with self.assertRaises(Exception):
+            executor.load_spec(spec_text)
+
+    def test_both_tasks_and_groups_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: invalid
+              tasks:
+              - name: t
+                image: alpine:3.18
+                command: ["echo"]
+              groups:
+              - name: g
+                tasks:
+                - name: t2
+                  image: alpine:3.18
+                  command: ["echo"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        with self.assertRaises(Exception):
+            executor.load_spec(spec_text)
+
+    def test_empty_workflow_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: empty
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        with self.assertRaises(Exception):
+            executor.load_spec(spec_text)
+
+    def test_resources_spec_parsed(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: with-resources
+              resources:
+                default:
+                  cpu: 2
+                  memory: 4Gi
+                  storage: 10Gi
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        self.assertEqual(spec.resources['default'].cpu, 2)
+        self.assertEqual(spec.resources['default'].memory, '4Gi')
+
+    def test_environment_parsed(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: env-test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["printenv"]
+                environment:
+                  MY_VAR: hello
+                  ANOTHER: world
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        self.assertEqual(spec.tasks[0].environment['MY_VAR'], 'hello')
+        self.assertEqual(spec.tasks[0].environment['ANOTHER'], 'world')
+
+
+class TestBuildDag(unittest.TestCase):
+    """Verify DAG construction from task dependencies."""
+
+    def _make_executor(self) -> LocalExecutor:
+        return LocalExecutor(work_dir='/tmp/unused')
+
+    def test_no_dependencies(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: parallel
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo", "a"]
+              - name: b
+                image: alpine:3.18
+                command: ["echo", "b"]
+              - name: c
+                image: alpine:3.18
+                command: ["echo", "c"]
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        self.assertEqual(len(executor._task_nodes), 3)
+        for node in executor._task_nodes.values():
+            self.assertEqual(len(node.upstream), 0)
+            self.assertEqual(len(node.downstream), 0)
+
+    def test_serial_chain(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial
+              tasks:
+              - name: first
+                image: alpine:3.18
+                command: ["echo"]
+              - name: second
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: first
+              - name: third
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: second
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        self.assertEqual(executor._task_nodes['first'].upstream, set())
+        self.assertEqual(executor._task_nodes['first'].downstream, {'second'})
+        self.assertEqual(executor._task_nodes['second'].upstream, {'first'})
+        self.assertEqual(executor._task_nodes['second'].downstream, {'third'})
+        self.assertEqual(executor._task_nodes['third'].upstream, {'second'})
+        self.assertEqual(executor._task_nodes['third'].downstream, set())
+
+    def test_diamond_dependency(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: diamond
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["echo"]
+              - name: left
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+              - name: right
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+              - name: join
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: left
+                - task: right
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        self.assertEqual(executor._task_nodes['root'].downstream, {'left', 'right'})
+        self.assertEqual(executor._task_nodes['join'].upstream, {'left', 'right'})
+
+    def test_unknown_dependency_raises(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: broken
+              tasks:
+              - name: task1
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: nonexistent
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor._build_dag(spec)
+        self.assertIn('nonexistent', str(context.exception))
+
+    def test_groups_with_cross_group_deps(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: cross-group
+              groups:
+              - name: fetch
+                tasks:
+                - name: download
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+              - name: process
+                tasks:
+                - name: transform
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+                  inputs:
+                  - task: download
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        self.assertEqual(executor._task_nodes['download'].downstream, {'transform'})
+        self.assertEqual(executor._task_nodes['transform'].upstream, {'download'})
+
+
+class TestFindReadyTasks(unittest.TestCase):
+    """Verify correct identification of tasks ready to execute."""
+
+    def test_all_root_tasks_ready(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: parallel
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        ready = executor._find_ready_tasks()
+        self.assertEqual(set(ready), {'a', 'b'})
+
+    def test_dependent_not_ready_until_upstream_completes(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial
+              tasks:
+              - name: first
+                image: alpine:3.18
+                command: ["echo"]
+              - name: second
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: first
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        ready = executor._find_ready_tasks()
+        self.assertEqual(ready, ['first'])
+
+        executor._results['first'] = TaskResult(name='first', exit_code=0, output_dir='/tmp/out')
+        ready = executor._find_ready_tasks()
+        self.assertEqual(ready, ['second'])
+
+    def test_failed_upstream_blocks_downstream(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial
+              tasks:
+              - name: first
+                image: alpine:3.18
+                command: ["echo"]
+              - name: second
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: first
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        executor._results['first'] = TaskResult(name='first', exit_code=1, output_dir='/tmp/out')
+        ready = executor._find_ready_tasks()
+        self.assertEqual(ready, [])
+
+
+class TestCancelDownstream(unittest.TestCase):
+
+    def test_cascading_cancel(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: chain
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: a
+              - name: c
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: b
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        executor._results['a'] = TaskResult(name='a', exit_code=1, output_dir='/tmp')
+        executor._cancel_downstream('a')
+
+        self.assertIn('b', executor._results)
+        self.assertIn('c', executor._results)
+        self.assertEqual(executor._results['b'].exit_code, -1)
+        self.assertEqual(executor._results['c'].exit_code, -1)
+
+
+class TestSubstituteTokens(unittest.TestCase):
+
+    def test_output_token(self):
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        tokens = {'output': '/work/task1/output'}
+        result = executor._substitute_tokens('echo data > {{output}}/file.txt', tokens)
+        self.assertEqual(result, 'echo data > /work/task1/output/file.txt')
+
+    def test_input_by_index(self):
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        tokens = {'input:0': '/work/upstream/output'}
+        result = executor._substitute_tokens('cat {{input:0}}/data.csv', tokens)
+        self.assertEqual(result, 'cat /work/upstream/output/data.csv')
+
+    def test_input_by_name(self):
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        tokens = {'input:task1': '/work/task1/output'}
+        result = executor._substitute_tokens('cat {{ input:task1 }}/data.csv', tokens)
+        self.assertEqual(result, 'cat /work/task1/output/data.csv')
+
+    def test_whitespace_around_tokens(self):
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        tokens = {'output': '/out'}
+        result = executor._substitute_tokens('{{ output }}/file.txt', tokens)
+        self.assertEqual(result, '/out/file.txt')
+
+    def test_multiple_tokens_in_one_string(self):
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        tokens = {'output': '/out', 'input:0': '/in0'}
+        result = executor._substitute_tokens('cp {{input:0}}/src {{output}}/dst', tokens)
+        self.assertEqual(result, 'cp /in0/src /out/dst')
+
+    def test_no_tokens_unchanged(self):
+        executor = LocalExecutor(work_dir='/tmp/unused')
+        result = executor._substitute_tokens('plain text no tokens', {})
+        self.assertEqual(result, 'plain text no tokens')
+
+
+class TestBuildTokenMap(unittest.TestCase):
+
+    def test_output_only(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: simple
+              tasks:
+              - name: task1
+                image: alpine:3.18
+                command: ["echo"]
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/work')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        node = executor._task_nodes['task1']
+        tokens = executor._build_token_map(node, '/tmp/work/task1/output')
+        self.assertEqual(tokens['output'], '/tmp/work/task1/output')
+        self.assertEqual(len(tokens), 1)
+
+    def test_with_upstream_inputs(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["echo"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: producer
+        ''')
+        executor = LocalExecutor(work_dir='/tmp/work')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        executor._results['producer'] = TaskResult(
+            name='producer', exit_code=0, output_dir='/tmp/work/producer/output')
+
+        node = executor._task_nodes['consumer']
+        tokens = executor._build_token_map(node, '/tmp/work/consumer/output')
+
+        self.assertEqual(tokens['output'], '/tmp/work/consumer/output')
+        self.assertEqual(tokens['input:0'], '/tmp/work/producer/output')
+        self.assertEqual(tokens['input:producer'], '/tmp/work/producer/output')
+
+
+class TestValidateForLocal(unittest.TestCase):
+    """Verify that unsupported features are detected and rejected."""
+
+    def _make_executor(self) -> LocalExecutor:
+        return LocalExecutor(work_dir='/tmp/unused')
+
+    def test_simple_spec_passes(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: ok
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_local(spec)
+
+    def test_dataset_input_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                inputs:
+                - dataset:
+                    name: my_dataset
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_local(spec)
+        self.assertIn('dataset', str(context.exception))
+
+    def test_url_input_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                inputs:
+                - url: s3://my-bucket/data/
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_local(spec)
+        self.assertIn('URL', str(context.exception))
+
+    def test_dataset_output_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                outputs:
+                - dataset:
+                    name: my_dataset
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_local(spec)
+        self.assertIn('dataset', str(context.exception).lower())
+
+    def test_url_output_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                outputs:
+                - url: s3://my-bucket/models/
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_local(spec)
+        self.assertIn('object storage', str(context.exception).lower())
+
+    def test_multiple_unsupported_features_all_reported(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task1
+                image: ubuntu:24.04
+                command: ["echo"]
+                inputs:
+                - url: s3://bucket/data/
+              - name: task2
+                image: ubuntu:24.04
+                command: ["echo"]
+                inputs:
+                - dataset:
+                    name: ds
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_local(spec)
+        error_message = str(context.exception)
+        self.assertIn('task1', error_message)
+        self.assertIn('task2', error_message)
+
+    def test_task_deps_only_passes(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: ok
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["echo"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: producer
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_local(spec)
+
+    def test_files_and_env_pass(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: ok
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["sh", "/tmp/run.sh"]
+                environment:
+                  MY_VAR: hello
+                files:
+                - contents: echo hi
+                  path: /tmp/run.sh
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_local(spec)
+
+
+class TestJinjaTemplateDetection(unittest.TestCase):
+
+    def _write_temp_spec(self, content: str) -> str:
+        f = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False)
+        f.write(content)
+        f.flush()
+        f.close()
+        return f.name
+
+    def test_jinja_block_detected(self):
+        path = self._write_temp_spec(textwrap.dedent('''\
+            workflow:
+              name: {%% if true %%}test{%% endif %%}
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        try:
+            with self.assertRaises(ValueError) as context:
+                run_workflow_locally(path)
+            self.assertIn('Jinja', str(context.exception))
+        finally:
+            os.unlink(path)
+
+    def test_jinja_comment_detected(self):
+        path = self._write_temp_spec(textwrap.dedent('''\
+            {# A comment #}
+            workflow:
+              name: test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        try:
+            with self.assertRaises(ValueError) as context:
+                run_workflow_locally(path)
+            self.assertIn('Jinja', str(context.exception))
+        finally:
+            os.unlink(path)
+
+    def test_default_values_section_detected(self):
+        path = self._write_temp_spec(textwrap.dedent('''\
+            workflow:
+              name: "{{experiment_name}}"
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+            default-values:
+              experiment_name: my-experiment
+        '''))
+        try:
+            with self.assertRaises(ValueError) as context:
+                run_workflow_locally(path)
+            self.assertIn('Jinja', str(context.exception))
+        finally:
+            os.unlink(path)
+
+
+# ============================================================================
+# Integration tests — require Docker; test actual container execution
+# ============================================================================
+@unittest.skipUnless(DOCKER_AVAILABLE, SKIP_DOCKER_MSG)
+class TestDockerExecution(unittest.TestCase):
+    """
+    Integration tests that run real OSMO workflow specs through the local executor
+    using Docker. Each test uses a spec that would normally run on a Kubernetes cluster.
+    """
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-test-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def _execute_spec(self, spec_text: str) -> bool:
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        return executor.execute(spec)
+
+    # ---- Single task tests ----
+
+    def test_hello_world(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: hello-osmo
+              tasks:
+              - name: hello
+                image: alpine:3.18
+                command: ["echo", "Hello from OSMO!"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    def test_single_task_with_args(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: args-test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+                args: ["argument1", "argument2"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    def test_task_failure_returns_false(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: will-fail
+              tasks:
+              - name: failing-task
+                image: alpine:3.18
+                command: ["sh", "-c", "exit 42"]
+        ''')
+        self.assertFalse(self._execute_spec(spec_text))
+
+    # ---- Environment variable tests ----
+
+    def test_environment_variables(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: env-test
+              tasks:
+              - name: check-env
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["test \\"$MY_VAR\\" = \\"hello_world\\" && test \\"$SECOND\\" = \\"42\\""]
+                environment:
+                  MY_VAR: hello_world
+                  SECOND: "42"
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    # ---- Files mount tests ----
+
+    def test_inline_file_mounted(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: files-test
+              tasks:
+              - name: check-file
+                image: alpine:3.18
+                command: ["sh", "/tmp/run.sh"]
+                files:
+                - contents: |
+                    echo "script ran successfully"
+                  path: /tmp/run.sh
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    def test_multiple_files_mounted(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: multi-files
+              tasks:
+              - name: check-files
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat /tmp/config.txt && sh /scripts/run.sh"]
+                files:
+                - contents: "key=value"
+                  path: /tmp/config.txt
+                - contents: |
+                    echo "second script ok"
+                  path: /scripts/run.sh
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    # ---- Data output tests ----
+
+    def test_output_directory_writable(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: output-test
+              tasks:
+              - name: write-output
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'payload' > {{output}}/result.txt"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        output_file = os.path.join(self.work_dir, 'write-output', 'output', 'result.txt')
+        self.assertTrue(os.path.exists(output_file))
+        with open(output_file) as f:
+            self.assertEqual(f.read().strip(), 'payload')
+
+    # ---- Serial data flow tests ----
+
+    def test_serial_data_flow_two_tasks(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial-data
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'from_producer' > {{output}}/data.txt"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:0}}/data.txt > {{output}}/received.txt"]
+                inputs:
+                - task: producer
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        received = os.path.join(self.work_dir, 'consumer', 'output', 'received.txt')
+        self.assertTrue(os.path.exists(received))
+        with open(received) as f:
+            self.assertEqual(f.read().strip(), 'from_producer')
+
+    def test_serial_chain_three_tasks(self):
+        """Mimics cookbook/tutorials/serial_workflow.yaml"""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial-chain
+              tasks:
+              - name: task1
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'task1_data' > {{output}}/result.txt"]
+
+              - name: task2
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args:
+                - |
+                  cat {{input:0}}/result.txt > {{output}}/result.txt
+                  echo '_plus_task2' >> {{output}}/result.txt
+                inputs:
+                - task: task1
+
+              - name: task3
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args:
+                - |
+                  cat {{input:0}}/result.txt > {{output}}/final.txt
+                  cat {{input:1}}/result.txt >> {{output}}/final.txt
+                inputs:
+                - task: task1
+                - task: task2
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        final = os.path.join(self.work_dir, 'task3', 'output', 'final.txt')
+        with open(final) as f:
+            content = f.read()
+        self.assertIn('task1_data', content)
+        self.assertIn('_plus_task2', content)
+
+    # ---- Parallel execution tests ----
+
+    def test_parallel_independent_tasks(self):
+        """Mimics cookbook/tutorials/parallel_tasks.yaml"""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: parallel-tasks
+              tasks:
+              - name: task-a
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'a' > {{output}}/marker.txt"]
+              - name: task-b
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'b' > {{output}}/marker.txt"]
+              - name: task-c
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'c' > {{output}}/marker.txt"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        for task_name, expected in [('task-a', 'a'), ('task-b', 'b'), ('task-c', 'c')]:
+            marker = os.path.join(self.work_dir, task_name, 'output', 'marker.txt')
+            with open(marker) as f:
+                self.assertEqual(f.read().strip(), expected)
+
+    # ---- Diamond DAG tests ----
+
+    def test_diamond_dag(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: diamond
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'root_data' > {{output}}/base.txt"]
+              - name: left
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'left:' > {{output}}/result.txt && cat {{input:0}}/base.txt >> {{output}}/result.txt"]
+                inputs:
+                - task: root
+              - name: right
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'right:' > {{output}}/result.txt && cat {{input:0}}/base.txt >> {{output}}/result.txt"]
+                inputs:
+                - task: root
+              - name: join
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:0}}/result.txt > {{output}}/final.txt && cat {{input:1}}/result.txt >> {{output}}/final.txt"]
+                inputs:
+                - task: left
+                - task: right
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        final = os.path.join(self.work_dir, 'join', 'output', 'final.txt')
+        with open(final) as f:
+            content = f.read()
+        self.assertIn('left:', content)
+        self.assertIn('right:', content)
+        self.assertIn('root_data', content)
+
+    # ---- Failure propagation tests ----
+
+    def test_failure_cancels_downstream(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: fail-chain
+              tasks:
+              - name: failing
+                image: alpine:3.18
+                command: ["sh", "-c", "exit 1"]
+              - name: should-not-run
+                image: alpine:3.18
+                command: ["sh", "-c", "echo 'oops' > {{output}}/should_not_exist.txt"]
+                inputs:
+                - task: failing
+        ''')
+        self.assertFalse(self._execute_spec(spec_text))
+        output_file = os.path.join(self.work_dir, 'should-not-run', 'output', 'should_not_exist.txt')
+        self.assertFalse(os.path.exists(output_file))
+
+    def test_parallel_failure_does_not_affect_independent_branch(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: partial-fail
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo ok > {{output}}/data.txt"]
+              - name: fail-branch
+                image: alpine:3.18
+                command: ["sh", "-c", "exit 1"]
+                inputs:
+                - task: root
+              - name: ok-branch
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:0}}/data.txt > {{output}}/received.txt"]
+                inputs:
+                - task: root
+        ''')
+        result = self._execute_spec(spec_text)
+        # The executor should stop on first failure, so the overall result is False.
+        # root succeeds, then one of the branches fails.
+        self.assertFalse(result)
+
+    # ---- Groups (ganged tasks) tests ----
+
+    def test_group_with_single_task(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: single-group
+              groups:
+              - name: my-group
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c"]
+                  args: ["echo 'group_ok' > {{output}}/marker.txt"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        marker = os.path.join(self.work_dir, 'leader', 'output', 'marker.txt')
+        with open(marker) as f:
+            self.assertEqual(f.read().strip(), 'group_ok')
+
+    def test_groups_with_data_flow(self):
+        """Mimics cookbook/tutorials/combination_workflow_simple.yaml structure."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: data-pipeline
+              groups:
+              - name: prepare-data
+                tasks:
+                - name: generate-dataset
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c"]
+                  args:
+                  - |
+                    mkdir -p {{output}}/data
+                    for i in 1 2 3; do echo "sample_$i" >> {{output}}/data/dataset.csv; done
+              - name: train-models
+                tasks:
+                - name: train-model
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c"]
+                  args:
+                  - |
+                    wc -l {{input:0}}/data/dataset.csv > {{output}}/line_count.txt
+                  inputs:
+                  - task: generate-dataset
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        line_count_file = os.path.join(self.work_dir, 'train-model', 'output', 'line_count.txt')
+        with open(line_count_file) as f:
+            content = f.read()
+        self.assertIn('3', content)
+
+    # ---- Input by task name tests ----
+
+    def test_input_by_task_name(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: named-input
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'named_data' > {{output}}/out.txt"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:producer}}/out.txt > {{output}}/received.txt"]
+                inputs:
+                - task: producer
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        received = os.path.join(self.work_dir, 'consumer', 'output', 'received.txt')
+        with open(received) as f:
+            self.assertEqual(f.read().strip(), 'named_data')
+
+    # ---- Files with token substitution ----
+
+    def test_file_contents_with_token_substitution(self):
+        """Mimics cookbook/tutorials/serial_workflow.yaml pattern of inline scripts with tokens."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: file-tokens
+              tasks:
+              - name: writer
+                image: alpine:3.18
+                command: ["sh", "/tmp/run.sh"]
+                files:
+                - contents: |
+                    echo "writing output"
+                    echo "file_data" > {{output}}/result.txt
+                  path: /tmp/run.sh
+              - name: reader
+                image: alpine:3.18
+                command: ["sh", "/tmp/run.sh"]
+                files:
+                - contents: |
+                    cat {{input:0}}/result.txt > {{output}}/received.txt
+                  path: /tmp/run.sh
+                inputs:
+                - task: writer
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        received = os.path.join(self.work_dir, 'reader', 'output', 'received.txt')
+        with open(received) as f:
+            self.assertEqual(f.read().strip(), 'file_data')
+
+    # ---- Resource spec ignored gracefully ----
+
+    def test_resources_ignored_gracefully(self):
+        """Resource specs are K8s-specific; local executor should accept and ignore them."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: with-resources
+              resources:
+                default:
+                  cpu: 2
+                  memory: 4Gi
+                  storage: 10Gi
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    # ---- Docker-not-found handling ----
+
+    def test_docker_not_found_graceful_failure(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: no-docker
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(
+            work_dir=self.work_dir,
+            keep_work_dir=True,
+            docker_cmd='nonexistent-docker-binary-12345',
+        )
+        spec = executor.load_spec(spec_text)
+        self.assertFalse(executor.execute(spec))
+
+    # ---- Alternative container runtime ----
+
+    def test_custom_docker_command(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: custom-cmd
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(
+            work_dir=self.work_dir,
+            keep_work_dir=True,
+            docker_cmd='docker',
+        )
+        spec = executor.load_spec(spec_text)
+        self.assertTrue(executor.execute(spec))
+
+
+# ============================================================================
+# Integration tests using actual cookbook spec files from the repo
+# ============================================================================
+@unittest.skipUnless(DOCKER_AVAILABLE, SKIP_DOCKER_MSG)
+class TestCookbookSpecs(unittest.TestCase):
+    """
+    Run real OSMO cookbook YAML specs that are designed for Kubernetes clusters,
+    and verify they execute successfully in the local Docker executor.
+    """
+
+    COOKBOOK_DIR = os.path.join(os.path.dirname(__file__), '..', '..', '..',
+                               'cookbook', 'tutorials')
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-cookbook-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def _run_cookbook_spec(self, filename: str) -> bool:
+        spec_path = os.path.join(self.COOKBOOK_DIR, filename)
+        if not os.path.exists(spec_path):
+            self.skipTest(f'Cookbook file not found: {spec_path}')
+        return run_workflow_locally(
+            spec_path=spec_path,
+            work_dir=self.work_dir,
+            keep_work_dir=True,
+        )
+
+    def test_hello_world_yaml(self):
+        self.assertTrue(self._run_cookbook_spec('hello_world.yaml'))
+
+    def test_parallel_tasks_yaml(self):
+        self.assertTrue(self._run_cookbook_spec('parallel_tasks.yaml'))
+
+    def test_serial_workflow_yaml(self):
+        self.assertTrue(self._run_cookbook_spec('serial_workflow.yaml'))
+
+    def test_resources_basic_yaml(self):
+        self.assertTrue(self._run_cookbook_spec('resources_basic.yaml'))
+
+    def test_combination_workflow_simple_yaml(self):
+        """
+        The combination_workflow_simple.yaml has a 'sleep 120' in transform-a.
+        We skip it here because it would take 2+ minutes; a trimmed version
+        of the same structure is tested in TestDockerExecution.test_groups_with_data_flow.
+        """
+        self.skipTest('Contains sleep 120; covered by test_groups_with_data_flow')
+
+    def test_unsupported_spec_data_download(self):
+        """data_download.yaml uses URL inputs — verify it is cleanly rejected."""
+        with self.assertRaises(ValueError) as context:
+            self._run_cookbook_spec('data_download.yaml')
+        self.assertIn('URL', str(context.exception))
+
+    def test_unsupported_spec_data_upload(self):
+        """data_upload.yaml uses URL outputs — verify it is cleanly rejected."""
+        with self.assertRaises(ValueError) as context:
+            self._run_cookbook_spec('data_upload.yaml')
+        self.assertIn('object storage', str(context.exception).lower())
+
+    def test_unsupported_spec_dataset_upload(self):
+        """dataset_upload.yaml uses dataset outputs — verify it is cleanly rejected."""
+        with self.assertRaises(ValueError) as context:
+            self._run_cookbook_spec('dataset_upload.yaml')
+        self.assertIn('dataset', str(context.exception).lower())
+
+    def test_unsupported_spec_template(self):
+        """template_hello_world.yaml uses default-values templating — verify it is rejected."""
+        spec_path = os.path.join(self.COOKBOOK_DIR, 'template_hello_world.yaml')
+        if not os.path.exists(spec_path):
+            self.skipTest('Cookbook file not found')
+        with self.assertRaises(ValueError) as context:
+            run_workflow_locally(
+                spec_path=spec_path,
+                work_dir=self.work_dir,
+                keep_work_dir=True,
+            )
+        self.assertIn('Jinja', str(context.exception))
+
+
+# ============================================================================
+# run_workflow_locally() integration tests
+# ============================================================================
+@unittest.skipUnless(DOCKER_AVAILABLE, SKIP_DOCKER_MSG)
+class TestRunWorkflowLocally(unittest.TestCase):
+    """Test the top-level run_workflow_locally() convenience function."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-func-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_success_cleans_up_when_not_keeping(self):
+        work_dir = tempfile.mkdtemp(prefix='osmo-local-cleanup-')
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            f.write(textwrap.dedent('''\
+                workflow:
+                  name: cleanup-test
+                  tasks:
+                  - name: task
+                    image: alpine:3.18
+                    command: ["echo", "ok"]
+            '''))
+            spec_path = f.name
+        try:
+            result = run_workflow_locally(
+                spec_path=spec_path,
+                work_dir=work_dir,
+                keep_work_dir=False,
+            )
+            self.assertTrue(result)
+            self.assertFalse(os.path.exists(work_dir))
+        finally:
+            os.unlink(spec_path)
+            if os.path.exists(work_dir):
+                shutil.rmtree(work_dir, ignore_errors=True)
+
+    def test_failure_preserves_work_dir(self):
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            f.write(textwrap.dedent('''\
+                workflow:
+                  name: fail-test
+                  tasks:
+                  - name: task
+                    image: alpine:3.18
+                    command: ["sh", "-c", "exit 1"]
+            '''))
+            spec_path = f.name
+        try:
+            result = run_workflow_locally(
+                spec_path=spec_path,
+                work_dir=self.work_dir,
+                keep_work_dir=False,
+            )
+            self.assertFalse(result)
+            self.assertTrue(os.path.exists(self.work_dir))
+        finally:
+            os.unlink(spec_path)
+
+    def test_keep_flag_preserves_on_success(self):
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            f.write(textwrap.dedent('''\
+                workflow:
+                  name: keep-test
+                  tasks:
+                  - name: task
+                    image: alpine:3.18
+                    command: ["echo", "ok"]
+            '''))
+            spec_path = f.name
+        try:
+            result = run_workflow_locally(
+                spec_path=spec_path,
+                work_dir=self.work_dir,
+                keep_work_dir=True,
+            )
+            self.assertTrue(result)
+            self.assertTrue(os.path.exists(self.work_dir))
+        finally:
+            os.unlink(spec_path)
+
+    def test_nonexistent_file_raises(self):
+        with self.assertRaises(FileNotFoundError):
+            run_workflow_locally(spec_path='/nonexistent/path/spec.yaml')
+
+
+if __name__ == '__main__':
+    unittest.main()

From 668c0a941246fe46b6620daa0b292329a9f52da3 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 2 Apr 2026 17:08:17 -0700
Subject: [PATCH 02/35] Add local.py and update dependencies in BUILD file

---
 src/cli/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/cli/BUILD b/src/cli/BUILD
index 7a9b905ee..cdada591a 100755
--- a/src/cli/BUILD
+++ b/src/cli/BUILD
@@ -37,6 +37,7 @@ osmo_py_library(
         "dataset.py",
         "editor.py",
         "formatters.py",
+        "local.py",
         "login.py",
         "main_parser.py",
         "pool.py",
@@ -73,6 +74,7 @@ osmo_py_library(
         "//src/lib/utils:validation",
         "//src/lib/utils:version",
         "//src/lib/utils:workflow",
+        "//src/utils:local_executor",
     ],
 )
 

From 63b062016d189286672f3695edb6bc0e72b4dfd3 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 14:37:32 -0700
Subject: [PATCH 03/35] Add GPU passthrough support in LocalExecutor

- Implemented GPU resource handling in LocalExecutor to allow tasks to request GPU resources.
- Added a new method to determine the GPU count for tasks based on their resource specifications.
- Updated Docker run command to include GPU options when applicable.
---
 src/utils/local_executor.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index c1926db34..c7f0939ca 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -60,6 +60,7 @@ class LocalExecutor:
       - Inline `files:` written to the container
       - `environment:` passed as Docker env vars
       - Task-to-task data flow via shared local directories
+      - GPU passthrough via --gpus for tasks that declare gpu > 0 in resources
 
     Does NOT support (raises clear errors):
       - Dataset / URL inputs/outputs (require object storage)
@@ -204,6 +205,13 @@ def _cancel_downstream(self, failed_task: str):
                         name=downstream, exit_code=-1, output_dir='')
                     queue.append(downstream)
 
+    def _task_gpu_count(self, task_spec: task_module.TaskSpec,
+                        spec: workflow_module.WorkflowSpec) -> int:
+        resource_spec = spec.resources.get(task_spec.resource)
+        if resource_spec and resource_spec.gpu:
+            return resource_spec.gpu
+        return 0
+
     def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskResult:
         task_spec = node.spec
         task_dir = os.path.join(self._work_dir, node.name)
@@ -225,6 +233,11 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
 
         docker_args = [self._docker_cmd, 'run', '--rm']
 
+        gpu_count = self._task_gpu_count(task_spec, spec)
+        if gpu_count > 0:
+            docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
+            logger.info('Task "%s" requesting %d GPU(s)', node.name, gpu_count)
+
         for key, value in task_spec.environment.items():
             resolved_value = self._substitute_tokens(value, token_map)
             docker_args += ['-e', f'{key}={resolved_value}']

From b20f2b9650d8ca5659e5348a3172e256d90555aa Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 14:59:51 -0700
Subject: [PATCH 04/35] Update Docker command construction in LocalExecutor

- Adjusted the handling of the resolved_command to correctly set the entrypoint and append arguments.
- Ensured that the first element of resolved_command is used as the entrypoint while the rest are appended to the Docker command.
---
 src/utils/local_executor.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index c7f0939ca..321a2bc9e 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -254,8 +254,10 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             host_path = os.path.join(files_dir, file_spec.path.lstrip('/'))
             docker_args += ['-v', f'{host_path}:{file_spec.path}:ro']
 
+        if resolved_command:
+            docker_args += ['--entrypoint', resolved_command[0]]
         docker_args.append(task_spec.image)
-        docker_args += resolved_command + resolved_args
+        docker_args += resolved_command[1:] + resolved_args
 
         logger.debug('Docker command: %s', ' '.join(docker_args))
 

From bcf4246ba80ba2de427536723fcfeccf6421cb38 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 16:25:26 -0700
Subject: [PATCH 05/35] Add resume functionality to local workflow execution

- Introduced `--resume` and `--from-step` options in the CLI to allow resuming previous runs.
- Implemented state management in `LocalExecutor` to save and restore task results.
- Enhanced logging to provide feedback on skipped tasks and remaining tasks during resumption.
- Added GPU detection improvements to handle scenarios where requested GPUs are unavailable.
---
 src/cli/local.py            |  15 +++++
 src/utils/local_executor.py | 128 ++++++++++++++++++++++++++++++++++--
 2 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/src/cli/local.py b/src/cli/local.py
index a9f12eaaf..b1d3be72e 100644
--- a/src/cli/local.py
+++ b/src/cli/local.py
@@ -54,6 +54,19 @@ def setup_parser(parser: argparse._SubParsersAction):
         dest='docker_cmd',
         default='docker',
         help='Docker-compatible command to use (e.g. podman). Default: docker.')
+    run_parser.add_argument(
+        '--resume',
+        action='store_true',
+        default=False,
+        help='Resume a previous run, skipping tasks that already completed successfully. '
+             'Requires --work-dir pointing to the previous run directory.')
+    run_parser.add_argument(
+        '--from-step',
+        dest='from_step',
+        default=None,
+        help='Resume from a specific task, re-running it and all downstream tasks. '
+             'Tasks upstream of the specified step are skipped if they completed '
+             'successfully. Requires --work-dir pointing to the previous run directory.')
     run_parser.set_defaults(func=_run_local)
 
 
@@ -63,6 +76,8 @@ def _run_local(service_client, args: argparse.Namespace):
             spec_path=args.workflow_file,
             work_dir=args.work_dir,
             keep_work_dir=args.keep,
+            resume=args.resume,
+            from_step=args.from_step,
         )
     except ValueError as error:
         print(f'Error: {error}', file=sys.stderr)
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 321a2bc9e..52da92d98 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -17,6 +17,7 @@
 """
 
 import dataclasses
+import json
 import logging
 import os
 import re
@@ -33,6 +34,8 @@
 
 logger = logging.getLogger(__name__)
 
+STATE_FILE_NAME = '.osmo-state.json'
+
 
 @dataclasses.dataclass
 class TaskNode:
@@ -74,19 +77,53 @@ def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str =
         self._docker_cmd = docker_cmd
         self._task_nodes: Dict[str, TaskNode] = {}
         self._results: Dict[str, TaskResult] = {}
+        self._available_gpus: int | None = None
+
+    def _detect_available_gpus(self) -> int:
+        if self._available_gpus is not None:
+            return self._available_gpus
+        try:
+            result = subprocess.run(
+                ['nvidia-smi', '--query-gpu=index', '--format=csv,noheader'],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode == 0:
+                gpu_indices = [line.strip() for line in result.stdout.strip().splitlines() if line.strip()]
+                self._available_gpus = len(gpu_indices)
+            else:
+                logger.warning('nvidia-smi failed (exit %d) — assuming 0 GPUs available', result.returncode)
+                self._available_gpus = 0
+        except FileNotFoundError:
+            logger.warning('nvidia-smi not found — assuming 0 GPUs available')
+            self._available_gpus = 0
+        except subprocess.TimeoutExpired:
+            logger.warning('nvidia-smi timed out — assuming 0 GPUs available')
+            self._available_gpus = 0
+        return self._available_gpus
 
     def load_spec(self, spec_text: str) -> workflow_module.WorkflowSpec:
         raw = yaml.safe_load(spec_text)
         versioned = workflow_module.VersionedWorkflowSpec(**raw)
         return versioned.workflow
 
-    def execute(self, spec: workflow_module.WorkflowSpec) -> bool:
+    def execute(self, spec: workflow_module.WorkflowSpec,
+                resume: bool = False, from_step: str | None = None) -> bool:
         self._build_dag(spec)
         self._validate_for_local(spec)
         self._setup_directories()
 
-        logger.info('Workflow "%s": %d task(s) across %d group(s)',
-                     spec.name, sum(len(g.tasks) for g in self._groups(spec)), len(self._groups(spec)))
+        if resume or from_step:
+            self._restore_completed_tasks(from_step)
+
+        total_tasks = sum(len(g.tasks) for g in self._groups(spec))
+        skipped = len(self._results)
+        remaining = total_tasks - skipped
+        if skipped > 0:
+            logger.info('Workflow "%s": resuming — %d task(s) skipped, %d remaining',
+                         spec.name, skipped, remaining)
+        else:
+            logger.info('Workflow "%s": %d task(s) across %d group(s)',
+                         spec.name, total_tasks, len(self._groups(spec)))
 
         ready = self._find_ready_tasks()
         while ready:
@@ -95,6 +132,7 @@ def execute(self, spec: workflow_module.WorkflowSpec) -> bool:
                 logger.info('--- Running task: %s (image: %s) ---', task_name, node.spec.image)
                 result = self._run_task(node, spec)
                 self._results[task_name] = result
+                self._save_state()
 
                 if result.exit_code != 0:
                     logger.error('Task "%s" failed with exit code %d', task_name, result.exit_code)
@@ -113,6 +151,64 @@ def execute(self, spec: workflow_module.WorkflowSpec) -> bool:
         logger.info('Workflow "%s" completed successfully', spec.name)
         return True
 
+    @property
+    def _state_file_path(self) -> str:
+        return os.path.join(self._work_dir, STATE_FILE_NAME)
+
+    def _save_state(self):
+        state = {
+            'tasks': {
+                name: {'exit_code': result.exit_code, 'output_dir': result.output_dir}
+                for name, result in self._results.items()
+                if result.exit_code != -1
+            }
+        }
+        with open(self._state_file_path, 'w') as f:
+            json.dump(state, f, indent=2)
+
+    def _load_state(self) -> Dict | None:
+        if not os.path.exists(self._state_file_path):
+            return None
+        with open(self._state_file_path) as f:
+            return json.load(f)
+
+    def _restore_completed_tasks(self, from_step: str | None = None):
+        state = self._load_state()
+        if state is None:
+            logger.info('No previous state found — starting from scratch')
+            return
+
+        completed: Dict[str, Dict] = {}
+        for name, info in state.get('tasks', {}).items():
+            if name not in self._task_nodes:
+                continue
+            if info['exit_code'] == 0 and os.path.isdir(info['output_dir']):
+                completed[name] = info
+
+        if from_step:
+            if from_step not in self._task_nodes:
+                raise ValueError(f'Task "{from_step}" not found in workflow')
+            to_invalidate = self._get_downstream_tasks(from_step)
+            to_invalidate.add(from_step)
+            for name in to_invalidate:
+                completed.pop(name, None)
+
+        for name, info in completed.items():
+            self._results[name] = TaskResult(
+                name=name, exit_code=0, output_dir=info['output_dir'])
+            logger.info('Resuming: skipping completed task "%s"', name)
+
+    def _get_downstream_tasks(self, task_name: str) -> Set[str]:
+        visited: Set[str] = set()
+        queue = [task_name]
+        while queue:
+            current = queue.pop(0)
+            for downstream in self._task_nodes[current].downstream:
+                if downstream not in visited:
+                    visited.add(downstream)
+                    queue.append(downstream)
+        return visited
+
     def _groups(self, spec: workflow_module.WorkflowSpec) -> List[task_module.TaskGroupSpec]:
         if spec.groups:
             return spec.groups
@@ -235,8 +331,19 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
 
         gpu_count = self._task_gpu_count(task_spec, spec)
         if gpu_count > 0:
-            docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
-            logger.info('Task "%s" requesting %d GPU(s)', node.name, gpu_count)
+            available = self._detect_available_gpus()
+            if available == 0:
+                logger.warning(
+                    'Task "%s" requests %d GPU(s) but no GPUs are available — running without GPU support',
+                    node.name, gpu_count)
+            elif gpu_count > available:
+                logger.warning(
+                    'Task "%s" requests %d GPU(s) but only %d available — running with %d GPU(s)',
+                    node.name, gpu_count, available, available)
+                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(available))}"']
+            else:
+                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
+            logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
         for key, value in task_spec.environment.items():
             resolved_value = self._substitute_tokens(value, token_map)
@@ -286,7 +393,13 @@ def _substitute_tokens(self, text: str, tokens: Dict[str, str]) -> str:
 
 
 def run_workflow_locally(spec_path: str, work_dir: str | None = None,
-                         keep_work_dir: bool = False) -> bool:
+                         keep_work_dir: bool = False,
+                         resume: bool = False,
+                         from_step: str | None = None) -> bool:
+    if (resume or from_step) and work_dir is None:
+        raise ValueError(
+            '--resume and --from-step require --work-dir pointing to a previous run directory.')
+
     if work_dir is None:
         work_dir = tempfile.mkdtemp(prefix='osmo-local-')
         logger.info('Using temporary work directory: %s', work_dir)
@@ -303,7 +416,8 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
 
     executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir)
     spec = executor.load_spec(spec_text)
-    success = executor.execute(spec)
+    success = executor.execute(spec, resume=resume or from_step is not None,
+                               from_step=from_step)
 
     if not keep_work_dir and success:
         logger.info('Cleaning up work directory: %s', work_dir)

From 3e4f7c3a5afd53219ac8df227067b172b0aaa4ff Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 16:25:44 -0700
Subject: [PATCH 06/35] Update .gitignore to include .venv directory

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2fe57a694..b84388d41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,5 @@ docs/**/domain_config.js
 .ruff_cache
 
 .lycheecache
+
+.venv/
\ No newline at end of file

From 1aa8c307e88f3c06b42a0d729b737428fff0ea2e Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 16:42:15 -0700
Subject: [PATCH 07/35] Enhance local workflow execution with Docker command
 support

- Added `docker_cmd` parameter to `run_workflow_locally` for customizable Docker command execution.
- Improved logging to redact sensitive information in Docker command arguments.
- Implemented error handling for unexecuted tasks in `LocalExecutor` to detect potential workflow stalls.
---
 src/cli/local.py            |  1 +
 src/utils/local_executor.py | 26 +++++++++++++++++++++++---
 src/utils/tests/BUILD       |  1 -
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/cli/local.py b/src/cli/local.py
index b1d3be72e..a481c4c56 100644
--- a/src/cli/local.py
+++ b/src/cli/local.py
@@ -78,6 +78,7 @@ def _run_local(service_client, args: argparse.Namespace):
             keep_work_dir=args.keep,
             resume=args.resume,
             from_step=args.from_step,
+            docker_cmd=args.docker_cmd,
         )
     except ValueError as error:
         print(f'Error: {error}', file=sys.stderr)
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 52da92d98..8c57861e1 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -143,6 +143,12 @@ def execute(self, spec: workflow_module.WorkflowSpec,
 
             ready = self._find_ready_tasks()
 
+        unexecuted = set(self._task_nodes.keys()) - set(self._results.keys())
+        if unexecuted:
+            logger.error('Workflow "%s" stalled — tasks could not be scheduled (possible cycle): %s',
+                         spec.name, ', '.join(sorted(unexecuted)))
+            return False
+
         failed = [name for name, r in self._results.items() if r.exit_code != 0]
         if failed:
             logger.error('Workflow failed. Failed tasks: %s', ', '.join(failed))
@@ -366,7 +372,19 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
         docker_args.append(task_spec.image)
         docker_args += resolved_command[1:] + resolved_args
 
-        logger.debug('Docker command: %s', ' '.join(docker_args))
+        if logger.isEnabledFor(logging.DEBUG):
+            redacted_args = []
+            skip_next = False
+            for arg in docker_args:
+                if skip_next:
+                    redacted_args.append(arg.split('=', 1)[0] + '=REDACTED')
+                    skip_next = False
+                elif arg == '-e':
+                    redacted_args.append(arg)
+                    skip_next = True
+                else:
+                    redacted_args.append(arg)
+            logger.debug('Docker command: %s', ' '.join(redacted_args))
 
         try:
             process = subprocess.run(docker_args, capture_output=False)
@@ -395,7 +413,8 @@ def _substitute_tokens(self, text: str, tokens: Dict[str, str]) -> str:
 def run_workflow_locally(spec_path: str, work_dir: str | None = None,
                          keep_work_dir: bool = False,
                          resume: bool = False,
-                         from_step: str | None = None) -> bool:
+                         from_step: str | None = None,
+                         docker_cmd: str = 'docker') -> bool:
     if (resume or from_step) and work_dir is None:
         raise ValueError(
             '--resume and --from-step require --work-dir pointing to a previous run directory.')
@@ -414,7 +433,8 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
             'Run "osmo workflow submit --dry-run -f <spec>" first to get the expanded spec,\n'
             'then save that output and run it locally.')
 
-    executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir)
+    executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
+                              docker_cmd=docker_cmd)
     spec = executor.load_spec(spec_text)
     success = executor.execute(spec, resume=resume or from_step is not None,
                                from_step=from_step)
diff --git a/src/utils/tests/BUILD b/src/utils/tests/BUILD
index 59050591c..a9369af05 100644
--- a/src/utils/tests/BUILD
+++ b/src/utils/tests/BUILD
@@ -62,5 +62,4 @@ py_test(
         "//src/utils:local_executor",
     ],
     local = True,
-    tags = ["manual"],
 )

From d332f6caedd4d4e79d04186cdb5abddc41f29aa5 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 17:14:24 -0700
Subject: [PATCH 08/35] Enhance documentation and comments in local execution
 modules

- Added detailed docstrings to functions and classes in `local.py` and `local_executor.py` to improve code readability and maintainability.
- Updated test cases in `test_local_executor.py` with descriptive comments to clarify the purpose of each test.
- Ensured consistency in documentation style across the codebase.
---
 src/cli/local.py                       |  2 +
 src/utils/local_executor.py            | 24 ++++++++
 src/utils/tests/test_local_executor.py | 78 +++++++++++++++++++++++++-
 3 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/src/cli/local.py b/src/cli/local.py
index a481c4c56..5bf596879 100644
--- a/src/cli/local.py
+++ b/src/cli/local.py
@@ -25,6 +25,7 @@
 
 
 def setup_parser(parser: argparse._SubParsersAction):
+    """Register the 'local' subcommand and its nested 'run' action with the CLI argument parser."""
     local_parser = parser.add_parser(
         'local',
         help='Run workflows locally using Docker (no Kubernetes cluster required).')
@@ -71,6 +72,7 @@ def setup_parser(parser: argparse._SubParsersAction):
 
 
 def _run_local(service_client, args: argparse.Namespace):
+    """Execute a workflow locally via Docker using the parsed CLI arguments."""
     try:
         success = local_executor.run_workflow_locally(
             spec_path=args.workflow_file,
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 8c57861e1..4a84c3ec8 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -39,6 +39,8 @@
 
 @dataclasses.dataclass
 class TaskNode:
+    """A node in the workflow DAG, linking a task spec to its upstream and downstream dependencies."""
+
     name: str
     spec: task_module.TaskSpec
     group: str
@@ -48,6 +50,8 @@ class TaskNode:
 
 @dataclasses.dataclass
 class TaskResult:
+    """Outcome of a single task execution, capturing its exit code and output directory path."""
+
     name: str
     exit_code: int
     output_dir: str
@@ -72,6 +76,7 @@ class LocalExecutor:
     """
 
     def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str = 'docker'):
+        """Initialize the executor with a work directory, cleanup preference, and container runtime command."""
         self._work_dir = work_dir
         self._keep_work_dir = keep_work_dir
         self._docker_cmd = docker_cmd
@@ -80,6 +85,7 @@ def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str =
         self._available_gpus: int | None = None
 
     def _detect_available_gpus(self) -> int:
+        """Query nvidia-smi to count available GPUs, caching the result for subsequent calls."""
         if self._available_gpus is not None:
             return self._available_gpus
         try:
@@ -102,12 +108,14 @@ def _detect_available_gpus(self) -> int:
         return self._available_gpus
 
     def load_spec(self, spec_text: str) -> workflow_module.WorkflowSpec:
+        """Parse raw YAML text into a validated WorkflowSpec via the versioned spec model."""
         raw = yaml.safe_load(spec_text)
         versioned = workflow_module.VersionedWorkflowSpec(**raw)
         return versioned.workflow
 
     def execute(self, spec: workflow_module.WorkflowSpec,
                 resume: bool = False, from_step: str | None = None) -> bool:
+        """Run all tasks in topological order, returning True if the entire workflow succeeds."""
         self._build_dag(spec)
         self._validate_for_local(spec)
         self._setup_directories()
@@ -159,9 +167,11 @@ def execute(self, spec: workflow_module.WorkflowSpec,
 
     @property
     def _state_file_path(self) -> str:
+        """Absolute path to the JSON state file used for resume tracking."""
         return os.path.join(self._work_dir, STATE_FILE_NAME)
 
     def _save_state(self):
+        """Persist current task results to the state file so runs can be resumed later."""
         state = {
             'tasks': {
                 name: {'exit_code': result.exit_code, 'output_dir': result.output_dir}
@@ -173,12 +183,14 @@ def _save_state(self):
             json.dump(state, f, indent=2)
 
     def _load_state(self) -> Dict | None:
+        """Load previously saved task state from disk, returning None if no state file exists."""
         if not os.path.exists(self._state_file_path):
             return None
         with open(self._state_file_path) as f:
             return json.load(f)
 
     def _restore_completed_tasks(self, from_step: str | None = None):
+        """Reload completed tasks from a previous run, optionally invalidating from a given step onward."""
         state = self._load_state()
         if state is None:
             logger.info('No previous state found — starting from scratch')
@@ -205,6 +217,7 @@ def _restore_completed_tasks(self, from_step: str | None = None):
             logger.info('Resuming: skipping completed task "%s"', name)
 
     def _get_downstream_tasks(self, task_name: str) -> Set[str]:
+        """Return all transitive downstream dependents of the given task via BFS."""
         visited: Set[str] = set()
         queue = [task_name]
         while queue:
@@ -216,11 +229,13 @@ def _get_downstream_tasks(self, task_name: str) -> Set[str]:
         return visited
 
     def _groups(self, spec: workflow_module.WorkflowSpec) -> List[task_module.TaskGroupSpec]:
+        """Return the spec's groups, or synthesize one group per task when groups are absent."""
         if spec.groups:
             return spec.groups
         return [task_module.TaskGroupSpec(name=t.name, tasks=[t]) for t in spec.tasks]
 
     def _build_dag(self, spec: workflow_module.WorkflowSpec):
+        """Construct the internal DAG of TaskNodes from the workflow spec's tasks and input dependencies."""
         self._task_nodes.clear()
         task_to_group: Dict[str, str] = {}
 
@@ -245,6 +260,7 @@ def _build_dag(self, spec: workflow_module.WorkflowSpec):
                         self._task_nodes[upstream_task].downstream.add(task_spec.name)
 
     def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
+        """Raise ValueError if the spec uses features unsupported in local mode (datasets, URLs, credentials, etc.)."""
         unsupported_features = []
         for group in self._groups(spec):
             for task_spec in group.tasks:
@@ -279,11 +295,13 @@ def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
                 + '\n  - '.join(unsupported_features))
 
     def _setup_directories(self):
+        """Create the work directory and per-task output directories on the host filesystem."""
         os.makedirs(self._work_dir, exist_ok=True)
         for task_name in self._task_nodes:
             os.makedirs(os.path.join(self._work_dir, task_name, 'output'), exist_ok=True)
 
     def _find_ready_tasks(self) -> List[str]:
+        """Return tasks whose upstream dependencies have all completed successfully."""
         completed = set(self._results.keys())
         ready = []
         for name, node in self._task_nodes.items():
@@ -296,6 +314,7 @@ def _find_ready_tasks(self) -> List[str]:
         return ready
 
     def _cancel_downstream(self, failed_task: str):
+        """Mark all transitive downstream tasks of a failed task as cancelled (exit_code -1)."""
         visited: Set[str] = set()
         queue = [failed_task]
         while queue:
@@ -309,12 +328,14 @@ def _cancel_downstream(self, failed_task: str):
 
     def _task_gpu_count(self, task_spec: task_module.TaskSpec,
                         spec: workflow_module.WorkflowSpec) -> int:
+        """Return the number of GPUs requested by a task's resource spec, defaulting to 0."""
         resource_spec = spec.resources.get(task_spec.resource)
         if resource_spec and resource_spec.gpu:
             return resource_spec.gpu
         return 0
 
     def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskResult:
+        """Execute a single task as a Docker container, mounting inputs/outputs/files and returning the result."""
         task_spec = node.spec
         task_dir = os.path.join(self._work_dir, node.name)
         output_dir = os.path.join(task_dir, 'output')
@@ -394,6 +415,7 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             return TaskResult(name=node.name, exit_code=127, output_dir=output_dir)
 
     def _build_token_map(self, node: TaskNode, output_dir: str) -> Dict[str, str]:
+        """Build a mapping of {{token}} keys to host paths for output and each upstream input."""
         tokens: Dict[str, str] = {
             'output': output_dir,
         }
@@ -405,6 +427,7 @@ def _build_token_map(self, node: TaskNode, output_dir: str) -> Dict[str, str]:
         return tokens
 
     def _substitute_tokens(self, text: str, tokens: Dict[str, str]) -> str:
+        """Replace all {{key}} placeholders in text with their corresponding token values."""
         for key, value in tokens.items():
             text = re.sub(r'\{\{\s*' + re.escape(key) + r'\s*\}\}', value, text)
         return text
@@ -415,6 +438,7 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
                          resume: bool = False,
                          from_step: str | None = None,
                          docker_cmd: str = 'docker') -> bool:
+    """Load a workflow spec from disk and execute it locally via Docker, managing the work directory lifecycle."""
     if (resume or from_step) and work_dir is None:
         raise ValueError(
             '--resume and --from-step require --work-dir pointing to a previous run directory.')
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 0da9ba993..673d1f2af 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -32,6 +32,7 @@
 # Helper: detect Docker availability once for the entire module
 # ---------------------------------------------------------------------------
 def _docker_available() -> bool:
+    """Return True if the Docker daemon is reachable via 'docker info', False otherwise."""
     try:
         result = subprocess.run(
             ['docker', 'info'],
@@ -54,6 +55,7 @@ class TestLoadSpec(unittest.TestCase):
     """Verify that real OSMO YAML specs are parsed correctly via the existing Pydantic models."""
 
     def test_single_task_spec(self):
+        """Parse a minimal single-task workflow and verify name, task count, and image."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: hello-osmo
@@ -71,6 +73,7 @@ def test_single_task_spec(self):
         self.assertEqual(spec.tasks[0].image, 'ubuntu:24.04')
 
     def test_serial_tasks_spec(self):
+        """Parse a two-task serial workflow and verify the task input dependency is resolved."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial-tasks
@@ -105,6 +108,7 @@ def test_serial_tasks_spec(self):
             self.assertEqual(first_input.task, 'task1')
 
     def test_groups_spec(self):
+        """Parse a grouped workflow and verify group structure and the lead task flag."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: grouped
@@ -126,6 +130,7 @@ def test_groups_spec(self):
         self.assertTrue(spec.groups[0].tasks[0].lead)
 
     def test_versioned_spec(self):
+        """Parse a spec with an explicit version field and verify it loads correctly."""
         spec_text = textwrap.dedent('''\
             version: 2
             workflow:
@@ -140,6 +145,7 @@ def test_versioned_spec(self):
         self.assertEqual(spec.name, 'versioned')
 
     def test_invalid_version_rejected(self):
+        """Reject a spec with an unsupported version number."""
         spec_text = textwrap.dedent('''\
             version: 99
             workflow:
@@ -154,6 +160,7 @@ def test_invalid_version_rejected(self):
             executor.load_spec(spec_text)
 
     def test_both_tasks_and_groups_rejected(self):
+        """Reject a spec that defines both top-level tasks and groups simultaneously."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: invalid
@@ -173,6 +180,7 @@ def test_both_tasks_and_groups_rejected(self):
             executor.load_spec(spec_text)
 
     def test_empty_workflow_rejected(self):
+        """Reject a spec with no tasks or groups defined."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: empty
@@ -182,6 +190,7 @@ def test_empty_workflow_rejected(self):
             executor.load_spec(spec_text)
 
     def test_resources_spec_parsed(self):
+        """Parse a spec with resource definitions and verify cpu/memory values."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: with-resources
@@ -201,6 +210,7 @@ def test_resources_spec_parsed(self):
         self.assertEqual(spec.resources['default'].memory, '4Gi')
 
     def test_environment_parsed(self):
+        """Parse a spec with environment variables and verify key-value pairs are preserved."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: env-test
@@ -222,9 +232,11 @@ class TestBuildDag(unittest.TestCase):
     """Verify DAG construction from task dependencies."""
 
     def _make_executor(self) -> LocalExecutor:
+        """Create a LocalExecutor with a throwaway work directory for DAG-only tests."""
         return LocalExecutor(work_dir='/tmp/unused')
 
     def test_no_dependencies(self):
+        """All tasks with no input dependencies have empty upstream and downstream sets."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: parallel
@@ -249,6 +261,7 @@ def test_no_dependencies(self):
             self.assertEqual(len(node.downstream), 0)
 
     def test_serial_chain(self):
+        """A three-task chain produces correct upstream/downstream links at each step."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial
@@ -279,6 +292,7 @@ def test_serial_chain(self):
         self.assertEqual(executor._task_nodes['third'].downstream, set())
 
     def test_diamond_dependency(self):
+        """A diamond DAG (root -> left/right -> join) wires fan-out and fan-in edges correctly."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: diamond
@@ -311,6 +325,7 @@ def test_diamond_dependency(self):
         self.assertEqual(executor._task_nodes['join'].upstream, {'left', 'right'})
 
     def test_unknown_dependency_raises(self):
+        """Referencing a non-existent upstream task raises ValueError."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: broken
@@ -328,6 +343,7 @@ def test_unknown_dependency_raises(self):
         self.assertIn('nonexistent', str(context.exception))
 
     def test_groups_with_cross_group_deps(self):
+        """Dependencies between tasks in different groups are wired correctly."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: cross-group
@@ -359,6 +375,7 @@ class TestFindReadyTasks(unittest.TestCase):
     """Verify correct identification of tasks ready to execute."""
 
     def test_all_root_tasks_ready(self):
+        """Tasks with no upstream dependencies are immediately ready."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: parallel
@@ -378,6 +395,7 @@ def test_all_root_tasks_ready(self):
         self.assertEqual(set(ready), {'a', 'b'})
 
     def test_dependent_not_ready_until_upstream_completes(self):
+        """A downstream task only becomes ready after its upstream dependency completes."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial
@@ -403,6 +421,7 @@ def test_dependent_not_ready_until_upstream_completes(self):
         self.assertEqual(ready, ['second'])
 
     def test_failed_upstream_blocks_downstream(self):
+        """A failed upstream task prevents its downstream dependents from becoming ready."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial
@@ -426,8 +445,10 @@ def test_failed_upstream_blocks_downstream(self):
 
 
 class TestCancelDownstream(unittest.TestCase):
+    """Verify that downstream tasks are cancelled when an upstream task fails."""
 
     def test_cascading_cancel(self):
+        """Cancellation of a failed task propagates to all transitive downstream dependents."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: chain
@@ -460,46 +481,55 @@ def test_cascading_cancel(self):
 
 
 class TestSubstituteTokens(unittest.TestCase):
+    """Verify {{token}} placeholder replacement in command strings and file contents."""
 
     def test_output_token(self):
+        """The {{output}} token is replaced with the task output directory path."""
         executor = LocalExecutor(work_dir='/tmp/unused')
         tokens = {'output': '/work/task1/output'}
         result = executor._substitute_tokens('echo data > {{output}}/file.txt', tokens)
         self.assertEqual(result, 'echo data > /work/task1/output/file.txt')
 
     def test_input_by_index(self):
+        """The {{input:N}} token is replaced with the Nth upstream output directory."""
         executor = LocalExecutor(work_dir='/tmp/unused')
         tokens = {'input:0': '/work/upstream/output'}
         result = executor._substitute_tokens('cat {{input:0}}/data.csv', tokens)
         self.assertEqual(result, 'cat /work/upstream/output/data.csv')
 
     def test_input_by_name(self):
+        """The {{input:taskname}} token is replaced with the named task's output directory."""
         executor = LocalExecutor(work_dir='/tmp/unused')
         tokens = {'input:task1': '/work/task1/output'}
         result = executor._substitute_tokens('cat {{ input:task1 }}/data.csv', tokens)
         self.assertEqual(result, 'cat /work/task1/output/data.csv')
 
     def test_whitespace_around_tokens(self):
+        """Whitespace inside {{ token }} braces is tolerated during substitution."""
         executor = LocalExecutor(work_dir='/tmp/unused')
         tokens = {'output': '/out'}
         result = executor._substitute_tokens('{{ output }}/file.txt', tokens)
         self.assertEqual(result, '/out/file.txt')
 
     def test_multiple_tokens_in_one_string(self):
+        """Multiple distinct tokens in the same string are all replaced."""
         executor = LocalExecutor(work_dir='/tmp/unused')
         tokens = {'output': '/out', 'input:0': '/in0'}
         result = executor._substitute_tokens('cp {{input:0}}/src {{output}}/dst', tokens)
         self.assertEqual(result, 'cp /in0/src /out/dst')
 
     def test_no_tokens_unchanged(self):
+        """Text without any token placeholders passes through unchanged."""
         executor = LocalExecutor(work_dir='/tmp/unused')
         result = executor._substitute_tokens('plain text no tokens', {})
         self.assertEqual(result, 'plain text no tokens')
 
 
 class TestBuildTokenMap(unittest.TestCase):
+    """Verify that token maps are built correctly from task DAG relationships."""
 
     def test_output_only(self):
+        """A task with no inputs produces a token map containing only the output key."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: simple
@@ -518,6 +548,7 @@ def test_output_only(self):
         self.assertEqual(len(tokens), 1)
 
     def test_with_upstream_inputs(self):
+        """A task with upstream inputs gets both index-based and name-based input tokens."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial
@@ -550,9 +581,11 @@ class TestValidateForLocal(unittest.TestCase):
     """Verify that unsupported features are detected and rejected."""
 
     def _make_executor(self) -> LocalExecutor:
+        """Create a LocalExecutor with a throwaway work directory for validation-only tests."""
         return LocalExecutor(work_dir='/tmp/unused')
 
     def test_simple_spec_passes(self):
+        """A spec using only task-to-task inputs passes local validation."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: ok
@@ -567,6 +600,7 @@ def test_simple_spec_passes(self):
         executor._validate_for_local(spec)
 
     def test_dataset_input_rejected(self):
+        """A spec with dataset inputs is rejected as unsupported in local mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -586,6 +620,7 @@ def test_dataset_input_rejected(self):
         self.assertIn('dataset', str(context.exception))
 
     def test_url_input_rejected(self):
+        """A spec with URL inputs is rejected as unsupported in local mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -604,6 +639,7 @@ def test_url_input_rejected(self):
         self.assertIn('URL', str(context.exception))
 
     def test_dataset_output_rejected(self):
+        """A spec with dataset outputs is rejected as unsupported in local mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -623,6 +659,7 @@ def test_dataset_output_rejected(self):
         self.assertIn('dataset', str(context.exception).lower())
 
     def test_url_output_rejected(self):
+        """A spec with URL outputs is rejected as unsupported in local mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -641,6 +678,7 @@ def test_url_output_rejected(self):
         self.assertIn('object storage', str(context.exception).lower())
 
     def test_multiple_unsupported_features_all_reported(self):
+        """All unsupported features across multiple tasks are reported in a single error."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -667,6 +705,7 @@ def test_multiple_unsupported_features_all_reported(self):
         self.assertIn('task2', error_message)
 
     def test_task_deps_only_passes(self):
+        """A spec with only task-to-task dependencies passes local validation."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: ok
@@ -686,6 +725,7 @@ def test_task_deps_only_passes(self):
         executor._validate_for_local(spec)
 
     def test_files_and_env_pass(self):
+        """A spec using files and environment variables passes local validation."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: ok
@@ -706,8 +746,10 @@ def test_files_and_env_pass(self):
 
 
 class TestJinjaTemplateDetection(unittest.TestCase):
+    """Verify that specs containing Jinja template markers are rejected before execution."""
 
     def _write_temp_spec(self, content: str) -> str:
+        """Write YAML content to a temporary file and return its path."""
         f = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False)
         f.write(content)
         f.flush()
@@ -715,6 +757,7 @@ def _write_temp_spec(self, content: str) -> str:
         return f.name
 
     def test_jinja_block_detected(self):
+        """A spec containing {%% %%} Jinja block tags is rejected."""
         path = self._write_temp_spec(textwrap.dedent('''\
             workflow:
               name: {%% if true %%}test{%% endif %%}
@@ -731,6 +774,7 @@ def test_jinja_block_detected(self):
             os.unlink(path)
 
     def test_jinja_comment_detected(self):
+        """A spec containing {# #} Jinja comment tags is rejected."""
         path = self._write_temp_spec(textwrap.dedent('''\
             {# A comment #}
             workflow:
@@ -748,6 +792,7 @@ def test_jinja_comment_detected(self):
             os.unlink(path)
 
     def test_default_values_section_detected(self):
+        """A spec containing a 'default-values' section is rejected as a Jinja template."""
         path = self._write_temp_spec(textwrap.dedent('''\
             workflow:
               name: "{{experiment_name}}"
@@ -777,12 +822,15 @@ class TestDockerExecution(unittest.TestCase):
     """
 
     def setUp(self):
+        """Create a temporary work directory for each Docker execution test."""
         self.work_dir = tempfile.mkdtemp(prefix='osmo-local-test-')
 
     def tearDown(self):
+        """Remove the temporary work directory after each test."""
         shutil.rmtree(self.work_dir, ignore_errors=True)
 
     def _execute_spec(self, spec_text: str) -> bool:
+        """Parse and execute a workflow spec string, returning the success status."""
         executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         return executor.execute(spec)
@@ -790,6 +838,7 @@ def _execute_spec(self, spec_text: str) -> bool:
     # ---- Single task tests ----
 
     def test_hello_world(self):
+        """Run a minimal single-task workflow that echoes a message."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: hello-osmo
@@ -801,6 +850,7 @@ def test_hello_world(self):
         self.assertTrue(self._execute_spec(spec_text))
 
     def test_single_task_with_args(self):
+        """Run a task with separate command and args fields."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: args-test
@@ -813,6 +863,7 @@ def test_single_task_with_args(self):
         self.assertTrue(self._execute_spec(spec_text))
 
     def test_task_failure_returns_false(self):
+        """A task that exits with a non-zero code causes execute() to return False."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: will-fail
@@ -826,6 +877,7 @@ def test_task_failure_returns_false(self):
     # ---- Environment variable tests ----
 
     def test_environment_variables(self):
+        """Environment variables declared in the spec are passed to the Docker container."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: env-test
@@ -843,6 +895,7 @@ def test_environment_variables(self):
     # ---- Files mount tests ----
 
     def test_inline_file_mounted(self):
+        """An inline file declared in the spec is mounted and executable inside the container."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: files-test
@@ -858,6 +911,7 @@ def test_inline_file_mounted(self):
         self.assertTrue(self._execute_spec(spec_text))
 
     def test_multiple_files_mounted(self):
+        """Multiple inline files at different paths are all mounted into the container."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: multi-files
@@ -878,6 +932,7 @@ def test_multiple_files_mounted(self):
     # ---- Data output tests ----
 
     def test_output_directory_writable(self):
+        """The {{output}} directory is writable from inside the container and persists on the host."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: output-test
@@ -896,6 +951,7 @@ def test_output_directory_writable(self):
     # ---- Serial data flow tests ----
 
     def test_serial_data_flow_two_tasks(self):
+        """Data written to {{output}} by a producer is readable via {{input:0}} by the consumer."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial-data
@@ -959,7 +1015,7 @@ def test_serial_chain_three_tasks(self):
     # ---- Parallel execution tests ----
 
     def test_parallel_independent_tasks(self):
-        """Mimics cookbook/tutorials/parallel_tasks.yaml"""
+        """Independent tasks with no dependencies all execute and produce their respective outputs."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: parallel-tasks
@@ -986,6 +1042,7 @@ def test_parallel_independent_tasks(self):
     # ---- Diamond DAG tests ----
 
     def test_diamond_dag(self):
+        """A diamond-shaped DAG executes correctly with fan-out and fan-in data flow."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: diamond
@@ -1025,6 +1082,7 @@ def test_diamond_dag(self):
     # ---- Failure propagation tests ----
 
     def test_failure_cancels_downstream(self):
+        """A failed task prevents its downstream dependent from running."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: fail-chain
@@ -1043,6 +1101,7 @@ def test_failure_cancels_downstream(self):
         self.assertFalse(os.path.exists(output_file))
 
     def test_parallel_failure_does_not_affect_independent_branch(self):
+        """When one branch of a parallel DAG fails, the executor stops with overall failure."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: partial-fail
@@ -1071,6 +1130,7 @@ def test_parallel_failure_does_not_affect_independent_branch(self):
     # ---- Groups (ganged tasks) tests ----
 
     def test_group_with_single_task(self):
+        """A group containing a single lead task executes and produces output."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: single-group
@@ -1125,6 +1185,7 @@ def test_groups_with_data_flow(self):
     # ---- Input by task name tests ----
 
     def test_input_by_task_name(self):
+        """The {{input:taskname}} token resolves to the named upstream task's output directory."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: named-input
@@ -1198,6 +1259,7 @@ def test_resources_ignored_gracefully(self):
     # ---- Docker-not-found handling ----
 
     def test_docker_not_found_graceful_failure(self):
+        """Using a non-existent docker binary results in a graceful failure rather than a crash."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: no-docker
@@ -1217,6 +1279,7 @@ def test_docker_not_found_graceful_failure(self):
     # ---- Alternative container runtime ----
 
     def test_custom_docker_command(self):
+        """An explicitly specified docker command is used to run the container."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: custom-cmd
@@ -1248,12 +1311,15 @@ class TestCookbookSpecs(unittest.TestCase):
                                'cookbook', 'tutorials')
 
     def setUp(self):
+        """Create a temporary work directory for cookbook spec tests."""
         self.work_dir = tempfile.mkdtemp(prefix='osmo-local-cookbook-')
 
     def tearDown(self):
+        """Remove the temporary work directory after each cookbook test."""
         shutil.rmtree(self.work_dir, ignore_errors=True)
 
     def _run_cookbook_spec(self, filename: str) -> bool:
+        """Execute a cookbook tutorial spec file through the local executor."""
         spec_path = os.path.join(self.COOKBOOK_DIR, filename)
         if not os.path.exists(spec_path):
             self.skipTest(f'Cookbook file not found: {spec_path}')
@@ -1264,15 +1330,19 @@ def _run_cookbook_spec(self, filename: str) -> bool:
         )
 
     def test_hello_world_yaml(self):
+        """Execute the hello_world.yaml cookbook tutorial spec."""
         self.assertTrue(self._run_cookbook_spec('hello_world.yaml'))
 
     def test_parallel_tasks_yaml(self):
+        """Execute the parallel_tasks.yaml cookbook tutorial spec."""
         self.assertTrue(self._run_cookbook_spec('parallel_tasks.yaml'))
 
     def test_serial_workflow_yaml(self):
+        """Execute the serial_workflow.yaml cookbook tutorial spec."""
         self.assertTrue(self._run_cookbook_spec('serial_workflow.yaml'))
 
     def test_resources_basic_yaml(self):
+        """Execute the resources_basic.yaml cookbook tutorial spec."""
         self.assertTrue(self._run_cookbook_spec('resources_basic.yaml'))
 
     def test_combination_workflow_simple_yaml(self):
@@ -1323,12 +1393,15 @@ class TestRunWorkflowLocally(unittest.TestCase):
     """Test the top-level run_workflow_locally() convenience function."""
 
     def setUp(self):
+        """Create a temporary work directory for run_workflow_locally tests."""
         self.work_dir = tempfile.mkdtemp(prefix='osmo-local-func-')
 
     def tearDown(self):
+        """Remove the temporary work directory after each test."""
         shutil.rmtree(self.work_dir, ignore_errors=True)
 
     def test_success_cleans_up_when_not_keeping(self):
+        """On success with keep_work_dir=False, the work directory is removed."""
         work_dir = tempfile.mkdtemp(prefix='osmo-local-cleanup-')
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
             f.write(textwrap.dedent('''\
@@ -1354,6 +1427,7 @@ def test_success_cleans_up_when_not_keeping(self):
                 shutil.rmtree(work_dir, ignore_errors=True)
 
     def test_failure_preserves_work_dir(self):
+        """On failure, the work directory is preserved for debugging regardless of the keep flag."""
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
             f.write(textwrap.dedent('''\
                 workflow:
@@ -1376,6 +1450,7 @@ def test_failure_preserves_work_dir(self):
             os.unlink(spec_path)
 
     def test_keep_flag_preserves_on_success(self):
+        """With keep_work_dir=True, the work directory is preserved even on success."""
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
             f.write(textwrap.dedent('''\
                 workflow:
@@ -1398,6 +1473,7 @@ def test_keep_flag_preserves_on_success(self):
             os.unlink(spec_path)
 
     def test_nonexistent_file_raises(self):
+        """Passing a non-existent spec file path raises FileNotFoundError."""
         with self.assertRaises(FileNotFoundError):
             run_workflow_locally(spec_path='/nonexistent/path/spec.yaml')
 

From 6297dc9393181f60bf46312e27064ec37ee57239 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 17:26:55 -0700
Subject: [PATCH 09/35] Refactor file handling in LocalExecutor for UTF-8
 encoding

- Updated file operations in `local_executor.py` to explicitly use UTF-8 encoding when reading and writing files, ensuring better compatibility with various text formats.
- Adjusted exception handling in `test_local_executor.py` to raise `ValueError` instead of a generic `Exception` for clearer error reporting.
- Modified test documentation to reflect the correct Jinja block syntax in error messages.
---
 src/utils/local_executor.py            | 10 +++++-----
 src/utils/tests/test_local_executor.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 4a84c3ec8..3f16e4e03 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -179,14 +179,14 @@ def _save_state(self):
                 if result.exit_code != -1
             }
         }
-        with open(self._state_file_path, 'w') as f:
+        with open(self._state_file_path, 'w', encoding='utf-8') as f:
             json.dump(state, f, indent=2)
 
     def _load_state(self) -> Dict | None:
         """Load previously saved task state from disk, returning None if no state file exists."""
         if not os.path.exists(self._state_file_path):
             return None
-        with open(self._state_file_path) as f:
+        with open(self._state_file_path, encoding='utf-8') as f:
             return json.load(f)
 
     def _restore_completed_tasks(self, from_step: str | None = None):
@@ -348,7 +348,7 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             resolved_contents = self._substitute_tokens(file_spec.contents, token_map)
             host_path = os.path.join(files_dir, file_spec.path.lstrip('/'))
             os.makedirs(os.path.dirname(host_path), exist_ok=True)
-            with open(host_path, 'w') as f:
+            with open(host_path, 'w', encoding='utf-8') as f:
                 f.write(resolved_contents)
 
         resolved_command = [self._substitute_tokens(c, token_map) for c in task_spec.command]
@@ -447,10 +447,10 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
         work_dir = tempfile.mkdtemp(prefix='osmo-local-')
         logger.info('Using temporary work directory: %s', work_dir)
 
-    with open(spec_path) as f:
+    with open(spec_path, encoding='utf-8') as f:
         spec_text = f.read()
 
-    template_markers = ('{%%', '{#', 'default-values')
+    template_markers = ('{%', '{#', 'default-values')
     if any(marker in spec_text for marker in template_markers):
         raise ValueError(
             'This spec uses Jinja templates which require server-side expansion.\n'
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 673d1f2af..5761c6822 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -156,7 +156,7 @@ def test_invalid_version_rejected(self):
                 command: ["echo", "ok"]
         ''')
         executor = LocalExecutor(work_dir='/tmp/unused')
-        with self.assertRaises(Exception):
+        with self.assertRaises(ValueError):
             executor.load_spec(spec_text)
 
     def test_both_tasks_and_groups_rejected(self):
@@ -176,7 +176,7 @@ def test_both_tasks_and_groups_rejected(self):
                   command: ["echo"]
         ''')
         executor = LocalExecutor(work_dir='/tmp/unused')
-        with self.assertRaises(Exception):
+        with self.assertRaises(ValueError):
             executor.load_spec(spec_text)
 
     def test_empty_workflow_rejected(self):
@@ -186,7 +186,7 @@ def test_empty_workflow_rejected(self):
               name: empty
         ''')
         executor = LocalExecutor(work_dir='/tmp/unused')
-        with self.assertRaises(Exception):
+        with self.assertRaises(ValueError):
             executor.load_spec(spec_text)
 
     def test_resources_spec_parsed(self):
@@ -757,10 +757,10 @@ def _write_temp_spec(self, content: str) -> str:
         return f.name
 
     def test_jinja_block_detected(self):
-        """A spec containing {%% %%} Jinja block tags is rejected."""
+        """A spec containing {% %} Jinja block tags is rejected."""
         path = self._write_temp_spec(textwrap.dedent('''\
             workflow:
-              name: {%% if true %%}test{%% endif %%}
+              name: {% if true %}test{% endif %}
               tasks:
               - name: task
                 image: alpine:3.18

From 0dffb7976f522e74d7dd70f1684b5a3cf9975584 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 18:23:54 -0700
Subject: [PATCH 10/35] Enhance error handling and update documentation in
 local execution modules

- Expanded exception handling in `local.py` to include `FileNotFoundError` and `PermissionError` for improved robustness.
- Updated comments in `local_executor.py` to clarify unsupported features in local mode, specifically regarding privileged containers and host networking.
- Modified test case in `test_local_executor.py` to ensure caller-supplied work directories are preserved on success, enhancing test accuracy.
---
 src/cli/local.py                       |  4 ++--
 src/utils/local_executor.py            | 15 ++++++++++++---
 src/utils/tests/test_local_executor.py |  6 +++---
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/cli/local.py b/src/cli/local.py
index 5bf596879..d446e26cf 100644
--- a/src/cli/local.py
+++ b/src/cli/local.py
@@ -1,5 +1,5 @@
 """
-SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  # pylint: disable=line-too-long
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -82,7 +82,7 @@ def _run_local(service_client, args: argparse.Namespace):
             from_step=args.from_step,
             docker_cmd=args.docker_cmd,
         )
-    except ValueError as error:
+    except (ValueError, FileNotFoundError, PermissionError) as error:
         print(f'Error: {error}', file=sys.stderr)
         sys.exit(1)
 
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 3f16e4e03..9f884f20c 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -1,5 +1,5 @@
 """
-SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  # pylint: disable=line-too-long
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -289,6 +289,14 @@ def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
                     unsupported_features.append(
                         f'Task "{task_spec.name}": volumeMounts require cluster-level host paths')
 
+                if task_spec.privileged:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": privileged containers are not supported in local mode')
+
+                if task_spec.hostNetwork:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": hostNetwork is not supported in local mode')
+
         if unsupported_features:
             raise ValueError(
                 'The following features are not supported in local execution mode:\n  - '
@@ -443,7 +451,8 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
         raise ValueError(
             '--resume and --from-step require --work-dir pointing to a previous run directory.')
 
-    if work_dir is None:
+    created_work_dir = work_dir is None
+    if created_work_dir:
         work_dir = tempfile.mkdtemp(prefix='osmo-local-')
         logger.info('Using temporary work directory: %s', work_dir)
 
@@ -463,7 +472,7 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
     success = executor.execute(spec, resume=resume or from_step is not None,
                                from_step=from_step)
 
-    if not keep_work_dir and success:
+    if created_work_dir and not keep_work_dir and success:
         logger.info('Cleaning up work directory: %s', work_dir)
         shutil.rmtree(work_dir, ignore_errors=True)
     elif not success:
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 5761c6822..917d39172 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -1400,8 +1400,8 @@ def tearDown(self):
         """Remove the temporary work directory after each test."""
         shutil.rmtree(self.work_dir, ignore_errors=True)
 
-    def test_success_cleans_up_when_not_keeping(self):
-        """On success with keep_work_dir=False, the work directory is removed."""
+    def test_caller_supplied_work_dir_preserved_on_success(self):
+        """A caller-supplied work_dir is never deleted, even with keep_work_dir=False."""
         work_dir = tempfile.mkdtemp(prefix='osmo-local-cleanup-')
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
             f.write(textwrap.dedent('''\
@@ -1420,7 +1420,7 @@ def test_success_cleans_up_when_not_keeping(self):
                 keep_work_dir=False,
             )
             self.assertTrue(result)
-            self.assertFalse(os.path.exists(work_dir))
+            self.assertTrue(os.path.exists(work_dir))
         finally:
             os.unlink(spec_path)
             if os.path.exists(work_dir):

From 0a7e15cb3deb4bc4b329098549e5311c0d58413b Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 18:39:45 -0700
Subject: [PATCH 11/35] Update copyright line in test_local_executor.py to
 comply with pylint standards

---
 src/utils/tests/test_local_executor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 917d39172..872aa11ac 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -1,5 +1,5 @@
 """
-SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  # pylint: disable=line-too-long
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

From 84a26daf86dbfd5c7d4de898b2bb8ca6b98ed1ff Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 18:53:44 -0700
Subject: [PATCH 12/35] Add shared memory size support for GPU tasks in local
 execution

- Introduced `--shm-size` argument in the CLI for specifying shared memory size for GPU containers, defaulting to 16g.
- Updated `LocalExecutor` to accept and utilize the shared memory size during Docker command construction.
- Added unit tests to verify correct handling of shared memory size for both default and custom values in GPU tasks, ensuring no shared memory argument is included for non-GPU tasks.
---
 src/cli/local.py                       |  8 +++
 src/utils/local_executor.py            | 14 +++-
 src/utils/tests/test_local_executor.py | 90 ++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/src/cli/local.py b/src/cli/local.py
index d446e26cf..67eef4ca4 100644
--- a/src/cli/local.py
+++ b/src/cli/local.py
@@ -68,6 +68,13 @@ def setup_parser(parser: argparse._SubParsersAction):
         help='Resume from a specific task, re-running it and all downstream tasks. '
              'Tasks upstream of the specified step are skipped if they completed '
              'successfully. Requires --work-dir pointing to the previous run directory.')
+    run_parser.add_argument(
+        '--shm-size',
+        dest='shm_size',
+        default=None,
+        help='Shared memory size for GPU containers (e.g. 16g, 32g). '
+             'Defaults to 16g for tasks that request GPUs. '
+             'PyTorch DataLoader workers require large shared memory.')
     run_parser.set_defaults(func=_run_local)
 
 
@@ -81,6 +88,7 @@ def _run_local(service_client, args: argparse.Namespace):
             resume=args.resume,
             from_step=args.from_step,
             docker_cmd=args.docker_cmd,
+            shm_size=args.shm_size,
         )
     except (ValueError, FileNotFoundError, PermissionError) as error:
         print(f'Error: {error}', file=sys.stderr)
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 9f884f20c..d5c8351d1 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -75,11 +75,15 @@ class LocalExecutor:
       - Templated specs with Jinja (require server-side expansion; use --dry-run first)
     """
 
-    def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str = 'docker'):
+    DEFAULT_SHM_SIZE = '16g'
+
+    def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str = 'docker',
+                 shm_size: str | None = None):
         """Initialize the executor with a work directory, cleanup preference, and container runtime command."""
         self._work_dir = work_dir
         self._keep_work_dir = keep_work_dir
         self._docker_cmd = docker_cmd
+        self._shm_size = shm_size
         self._task_nodes: Dict[str, TaskNode] = {}
         self._results: Dict[str, TaskResult] = {}
         self._available_gpus: int | None = None
@@ -380,6 +384,9 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
             logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
+            shm_size = self._shm_size or self.DEFAULT_SHM_SIZE
+            docker_args += ['--shm-size', shm_size]
+
         for key, value in task_spec.environment.items():
             resolved_value = self._substitute_tokens(value, token_map)
             docker_args += ['-e', f'{key}={resolved_value}']
@@ -445,7 +452,8 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
                          keep_work_dir: bool = False,
                          resume: bool = False,
                          from_step: str | None = None,
-                         docker_cmd: str = 'docker') -> bool:
+                         docker_cmd: str = 'docker',
+                         shm_size: str | None = None) -> bool:
     """Load a workflow spec from disk and execute it locally via Docker, managing the work directory lifecycle."""
     if (resume or from_step) and work_dir is None:
         raise ValueError(
@@ -467,7 +475,7 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
             'then save that output and run it locally.')
 
     executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                              docker_cmd=docker_cmd)
+                              docker_cmd=docker_cmd, shm_size=shm_size)
     spec = executor.load_spec(spec_text)
     success = executor.execute(spec, resume=resume or from_step is not None,
                                from_step=from_step)
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 872aa11ac..82b49a98f 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -745,6 +745,96 @@ def test_files_and_env_pass(self):
         executor._validate_for_local(spec)
 
 
+class TestShmSize(unittest.TestCase):
+    """Verify that --shm-size is passed to Docker for GPU tasks."""
+
+    def setUp(self):
+        """Create a temporary work directory for shm-size tests."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-shm-')
+
+    def tearDown(self):
+        """Remove the temporary work directory after each test."""
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    @mock.patch('subprocess.run')
+    def test_gpu_task_gets_default_shm_size(self, mock_run):
+        """A GPU task includes --shm-size with the default value when none is specified."""
+        mock_run.return_value = mock.Mock(returncode=0, stdout='0\n')
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: shm-test
+              resources:
+                gpu-resource:
+                  gpu: 1
+              tasks:
+              - name: train
+                image: pytorch:latest
+                resource: gpu-resource
+                command: ["python", "train.py"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._setup_directories()
+        node = executor._task_nodes['train']
+        executor._run_task(node, spec)
+
+        docker_call_args = mock_run.call_args_list[-1][0][0]
+        self.assertIn('--shm-size', docker_call_args)
+        shm_index = docker_call_args.index('--shm-size')
+        self.assertEqual(docker_call_args[shm_index + 1], '16g')
+
+    @mock.patch('subprocess.run')
+    def test_gpu_task_gets_custom_shm_size(self, mock_run):
+        """A GPU task uses the user-specified --shm-size value."""
+        mock_run.return_value = mock.Mock(returncode=0, stdout='0\n')
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: shm-test
+              resources:
+                gpu-resource:
+                  gpu: 1
+              tasks:
+              - name: train
+                image: pytorch:latest
+                resource: gpu-resource
+                command: ["python", "train.py"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True, shm_size='32g')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._setup_directories()
+        node = executor._task_nodes['train']
+        executor._run_task(node, spec)
+
+        docker_call_args = mock_run.call_args_list[-1][0][0]
+        self.assertIn('--shm-size', docker_call_args)
+        shm_index = docker_call_args.index('--shm-size')
+        self.assertEqual(docker_call_args[shm_index + 1], '32g')
+
+    @mock.patch('subprocess.run')
+    def test_non_gpu_task_has_no_shm_size(self, mock_run):
+        """A task without GPU resources does not include --shm-size in Docker args."""
+        mock_run.return_value = mock.Mock(returncode=0)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: no-gpu
+              tasks:
+              - name: preprocess
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._setup_directories()
+        node = executor._task_nodes['preprocess']
+        executor._run_task(node, spec)
+
+        docker_call_args = mock_run.call_args[0][0]
+        self.assertNotIn('--shm-size', docker_call_args)
+
+
 class TestJinjaTemplateDetection(unittest.TestCase):
     """Verify that specs containing Jinja template markers are rejected before execution."""
 

From 05f4ea6a3e7c599e1e3d2ca91d6db288cab7c758 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 19:19:02 -0700
Subject: [PATCH 13/35] Add tutorial specs filegroup and enhance local executor
 tests

- Created a new `tutorial_specs` filegroup in the `cookbook/tutorials/BUILD` to include YAML specifications.
- Updated the `BUILD` file in `src/utils/tests` to include the new `tutorial_specs` as data for local tests.
- Added a new test class in `test_local_executor.py` to validate unsupported features in cookbook specifications, ensuring proper error handling for unsupported fields.
- Implemented additional tests to verify that specific unsupported features are correctly rejected during local execution.
---
 cookbook/tutorials/BUILD               |   5 +
 src/utils/tests/BUILD                  |   3 +
 src/utils/tests/test_local_executor.py | 245 +++++++++++++++++++------
 3 files changed, 195 insertions(+), 58 deletions(-)
 create mode 100644 cookbook/tutorials/BUILD

diff --git a/cookbook/tutorials/BUILD b/cookbook/tutorials/BUILD
new file mode 100644
index 000000000..d56c526f4
--- /dev/null
+++ b/cookbook/tutorials/BUILD
@@ -0,0 +1,5 @@
+filegroup(
+    name = "tutorial_specs",
+    srcs = glob(["*.yaml"]),
+    visibility = ["//src/utils/tests:__pkg__"],
+)
diff --git a/src/utils/tests/BUILD b/src/utils/tests/BUILD
index a9369af05..efe72682f 100644
--- a/src/utils/tests/BUILD
+++ b/src/utils/tests/BUILD
@@ -61,5 +61,8 @@ py_test(
     deps = [
         "//src/utils:local_executor",
     ],
+    data = [
+        "//cookbook/tutorials:tutorial_specs",
+    ],
     local = True,
 )
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 82b49a98f..25ccac7cb 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -745,6 +745,89 @@ def test_files_and_env_pass(self):
         executor._validate_for_local(spec)
 
 
+class TestValidateForLocalRemainingBranches(unittest.TestCase):
+    """Verify that _validate_for_local rejects credentials, checkpoint, volumeMounts, privileged, and hostNetwork."""
+
+    _UNSUPPORTED_SPECS = {
+        'credentials': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  tasks:
+                  - name: task
+                    image: ubuntu:24.04
+                    command: ["echo"]
+                    credentials:
+                      my-secret: NGC_API_KEY
+            '''),
+            'expected_substring': 'credentials',
+        },
+        'checkpoint': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  tasks:
+                  - name: task
+                    image: ubuntu:24.04
+                    command: ["echo"]
+                    checkpoint:
+                    - path: /output/model
+                      url: s3://bucket/checkpoints/
+                      frequency: 300
+            '''),
+            'expected_substring': 'checkpoint',
+        },
+        'volumeMounts': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  tasks:
+                  - name: task
+                    image: ubuntu:24.04
+                    command: ["echo"]
+                    volumeMounts:
+                    - "/data:/data:ro"
+            '''),
+            'expected_substring': 'volumeMounts',
+        },
+        'privileged': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  tasks:
+                  - name: task
+                    image: ubuntu:24.04
+                    command: ["echo"]
+                    privileged: true
+            '''),
+            'expected_substring': 'privileged',
+        },
+        'hostNetwork': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  tasks:
+                  - name: task
+                    image: ubuntu:24.04
+                    command: ["echo"]
+                    hostNetwork: true
+            '''),
+            'expected_substring': 'hostNetwork',
+        },
+    }
+
+    def test_unsupported_fields_rejected(self):
+        """Each unsupported task-level field is detected and rejected with a descriptive error."""
+        for feature, case in self._UNSUPPORTED_SPECS.items():
+            with self.subTest(feature=feature):
+                executor = LocalExecutor(work_dir='/tmp/unused')
+                spec = executor.load_spec(case['yaml'])
+                executor._build_dag(spec)
+                with self.assertRaises(ValueError) as context:
+                    executor._validate_for_local(spec)
+                self.assertIn(case['expected_substring'], str(context.exception))
+
+
 class TestShmSize(unittest.TestCase):
     """Verify that --shm-size is passed to Docker for GPU tasks."""
 
@@ -901,6 +984,108 @@ def test_default_values_section_detected(self):
             os.unlink(path)
 
 
+# ============================================================================
+# Tests that exercise error paths without requiring Docker
+# ============================================================================
+class TestDockerNotFoundHandling(unittest.TestCase):
+    """Verify graceful failure when Docker is not available (no Docker required to run)."""
+
+    def setUp(self):
+        """Create a temporary work directory."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-test-')
+
+    def tearDown(self):
+        """Remove the temporary work directory."""
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_docker_not_found_graceful_failure(self):
+        """Using a non-existent docker binary results in a graceful failure rather than a crash."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: no-docker
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(
+            work_dir=self.work_dir,
+            keep_work_dir=True,
+            docker_cmd='nonexistent-docker-binary-12345',
+        )
+        spec = executor.load_spec(spec_text)
+        self.assertFalse(executor.execute(spec))
+
+
+class TestCookbookSpecValidation(unittest.TestCase):
+    """
+    Validate that cookbook specs using unsupported features are rejected
+    before any container is started (no Docker required to run).
+    """
+
+    COOKBOOK_DIR = os.path.join(os.path.dirname(__file__), '..', '..', '..',
+                               'cookbook', 'tutorials')
+
+    def setUp(self):
+        """Create a temporary work directory for cookbook validation tests."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-cookbook-')
+
+    def tearDown(self):
+        """Remove the temporary work directory after each test."""
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def _run_cookbook_spec(self, filename: str) -> bool:
+        """Execute a cookbook tutorial spec file through the local executor."""
+        spec_path = os.path.join(self.COOKBOOK_DIR, filename)
+        self.assertTrue(os.path.exists(spec_path),
+                        f'Cookbook file not found: {spec_path}')
+        return run_workflow_locally(
+            spec_path=spec_path,
+            work_dir=self.work_dir,
+            keep_work_dir=True,
+        )
+
+    def test_unsupported_spec_data_download(self):
+        """data_download.yaml uses URL inputs — verify it is cleanly rejected."""
+        with self.assertRaises(ValueError) as context:
+            self._run_cookbook_spec('data_download.yaml')
+        self.assertIn('URL', str(context.exception))
+
+    def test_unsupported_spec_data_upload(self):
+        """data_upload.yaml uses URL outputs — verify it is cleanly rejected."""
+        with self.assertRaises(ValueError) as context:
+            self._run_cookbook_spec('data_upload.yaml')
+        self.assertIn('object storage', str(context.exception).lower())
+
+    def test_unsupported_spec_dataset_upload(self):
+        """dataset_upload.yaml uses dataset outputs — verify it is cleanly rejected."""
+        with self.assertRaises(ValueError) as context:
+            self._run_cookbook_spec('dataset_upload.yaml')
+        self.assertIn('dataset', str(context.exception).lower())
+
+    def test_unsupported_spec_template(self):
+        """template_hello_world.yaml uses default-values templating — verify it is rejected."""
+        spec_path = os.path.join(self.COOKBOOK_DIR, 'template_hello_world.yaml')
+        self.assertTrue(os.path.exists(spec_path),
+                        f'Cookbook file not found: {spec_path}')
+        with self.assertRaises(ValueError) as context:
+            run_workflow_locally(
+                spec_path=spec_path,
+                work_dir=self.work_dir,
+                keep_work_dir=True,
+            )
+        self.assertIn('Jinja', str(context.exception))
+
+
+class TestRunWorkflowLocallyErrors(unittest.TestCase):
+    """Test error handling in run_workflow_locally() that does not require Docker."""
+
+    def test_nonexistent_file_raises(self):
+        """Passing a non-existent spec file path raises FileNotFoundError."""
+        with self.assertRaises(FileNotFoundError):
+            run_workflow_locally(spec_path='/nonexistent/path/spec.yaml')
+
+
 # ============================================================================
 # Integration tests — require Docker; test actual container execution
 # ============================================================================
@@ -1346,26 +1531,6 @@ def test_resources_ignored_gracefully(self):
         ''')
         self.assertTrue(self._execute_spec(spec_text))
 
-    # ---- Docker-not-found handling ----
-
-    def test_docker_not_found_graceful_failure(self):
-        """Using a non-existent docker binary results in a graceful failure rather than a crash."""
-        spec_text = textwrap.dedent('''\
-            workflow:
-              name: no-docker
-              tasks:
-              - name: task
-                image: alpine:3.18
-                command: ["echo", "ok"]
-        ''')
-        executor = LocalExecutor(
-            work_dir=self.work_dir,
-            keep_work_dir=True,
-            docker_cmd='nonexistent-docker-binary-12345',
-        )
-        spec = executor.load_spec(spec_text)
-        self.assertFalse(executor.execute(spec))
-
     # ---- Alternative container runtime ----
 
     def test_custom_docker_command(self):
@@ -1411,8 +1576,8 @@ def tearDown(self):
     def _run_cookbook_spec(self, filename: str) -> bool:
         """Execute a cookbook tutorial spec file through the local executor."""
         spec_path = os.path.join(self.COOKBOOK_DIR, filename)
-        if not os.path.exists(spec_path):
-            self.skipTest(f'Cookbook file not found: {spec_path}')
+        self.assertTrue(os.path.exists(spec_path),
+                        f'Cookbook file not found: {spec_path}')
         return run_workflow_locally(
             spec_path=spec_path,
             work_dir=self.work_dir,
@@ -1443,37 +1608,6 @@ def test_combination_workflow_simple_yaml(self):
         """
         self.skipTest('Contains sleep 120; covered by test_groups_with_data_flow')
 
-    def test_unsupported_spec_data_download(self):
-        """data_download.yaml uses URL inputs — verify it is cleanly rejected."""
-        with self.assertRaises(ValueError) as context:
-            self._run_cookbook_spec('data_download.yaml')
-        self.assertIn('URL', str(context.exception))
-
-    def test_unsupported_spec_data_upload(self):
-        """data_upload.yaml uses URL outputs — verify it is cleanly rejected."""
-        with self.assertRaises(ValueError) as context:
-            self._run_cookbook_spec('data_upload.yaml')
-        self.assertIn('object storage', str(context.exception).lower())
-
-    def test_unsupported_spec_dataset_upload(self):
-        """dataset_upload.yaml uses dataset outputs — verify it is cleanly rejected."""
-        with self.assertRaises(ValueError) as context:
-            self._run_cookbook_spec('dataset_upload.yaml')
-        self.assertIn('dataset', str(context.exception).lower())
-
-    def test_unsupported_spec_template(self):
-        """template_hello_world.yaml uses default-values templating — verify it is rejected."""
-        spec_path = os.path.join(self.COOKBOOK_DIR, 'template_hello_world.yaml')
-        if not os.path.exists(spec_path):
-            self.skipTest('Cookbook file not found')
-        with self.assertRaises(ValueError) as context:
-            run_workflow_locally(
-                spec_path=spec_path,
-                work_dir=self.work_dir,
-                keep_work_dir=True,
-            )
-        self.assertIn('Jinja', str(context.exception))
-
 
 # ============================================================================
 # run_workflow_locally() integration tests
@@ -1562,11 +1696,6 @@ def test_keep_flag_preserves_on_success(self):
         finally:
             os.unlink(spec_path)
 
-    def test_nonexistent_file_raises(self):
-        """Passing a non-existent spec file path raises FileNotFoundError."""
-        with self.assertRaises(FileNotFoundError):
-            run_workflow_locally(spec_path='/nonexistent/path/spec.yaml')
-
 
 if __name__ == '__main__':
     unittest.main()

From 684d554b007062f2a4182689d8f731180c2e5531 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 19:36:03 -0700
Subject: [PATCH 14/35] Implement file path validation in LocalExecutor to
 prevent directory traversal

- Enhanced the `LocalExecutor` class to validate file paths, ensuring they do not escape the task directory. This prevents potential security risks associated with directory traversal attacks.
- Added unit tests in `test_local_executor.py` to verify that invalid file paths raise appropriate exceptions, while valid paths are accepted without errors.
- Updated documentation in `AGENTS.md` to include the new local executor functionality for Docker-based workflow execution.
---
 AGENTS.md                              |  2 +
 src/utils/local_executor.py            |  7 ++-
 src/utils/tests/test_local_executor.py | 59 ++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 45b388253..3c8129e5a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -120,6 +120,7 @@ Entry point: `service/core/service.py`. Framework: FastAPI + Uvicorn + OpenTelem
 | `utils/job/` | `Task`, `FrontendJob`, `K8sObjectFactory`, `PodGroupTopologyBuilder` | Workflow execution framework. Task → K8s spec generation. Gang scheduling via PodGroup. Topology constraints. Backend job definitions. |
 | `utils/connectors/` | `ClusterConnector`, `PostgresConnector`, `RedisConnector` | K8s API wrapper, PostgreSQL operations, Redis job queue management. |
 | `utils/secret_manager/` | `SecretManager` | JWE-based secret encryption/decryption. MEK/UEK key management. |
+| `utils/local_executor.py` | `LocalExecutor`, `run_workflow_locally` | Local Docker-based workflow execution. Runs workflow specs without Kubernetes by mapping tasks to `docker run` commands with volume mounts for data flow. Supports DAG scheduling, resume (`--from-step`), and GPU passthrough. |
 | `utils/progress_check/` | — | Liveness/progress tracking for long-running services. |
 | `utils/metrics/` | — | Prometheus metrics collection and export. |
 
@@ -139,6 +140,7 @@ Entry point: `cli.py` → `main_parser.py` (argparse). Subcommand modules:
 | `login.py`                                                                                                     | Authentication                   |
 | `pool.py`, `resources.py`, `user.py`, `credential.py`, `access_token.py`, `bucket.py`, `task.py`, `version.py` | Supporting commands              |
 | `backend.py`                                                                                                   | Backend cluster management       |
+| `local.py`                                                                                                     | Local workflow execution via Docker (`osmo local run`) |
 
 Features: Tab completion (shtab), response formatting (`formatters.py`), spec editor (`editor.py`), PyInstaller packaging (`cli_builder.py`, `packaging/`).
 
diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index d5c8351d1..01c11b6de 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -358,7 +358,10 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
 
         for file_spec in task_spec.files:
             resolved_contents = self._substitute_tokens(file_spec.contents, token_map)
-            host_path = os.path.join(files_dir, file_spec.path.lstrip('/'))
+            host_path = os.path.realpath(os.path.join(files_dir, file_spec.path.lstrip('/')))
+            if not host_path.startswith(os.path.realpath(files_dir) + os.sep):
+                raise ValueError(
+                    f'Task "{node.name}": file path "{file_spec.path}" escapes the task directory')
             os.makedirs(os.path.dirname(host_path), exist_ok=True)
             with open(host_path, 'w', encoding='utf-8') as f:
                 f.write(resolved_contents)
@@ -400,7 +403,7 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 docker_args += ['-v', f'{upstream_result.output_dir}:{input_mount}:ro']
 
         for file_spec in task_spec.files:
-            host_path = os.path.join(files_dir, file_spec.path.lstrip('/'))
+            host_path = os.path.realpath(os.path.join(files_dir, file_spec.path.lstrip('/')))
             docker_args += ['-v', f'{host_path}:{file_spec.path}:ro']
 
         if resolved_command:
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 25ccac7cb..7d3f19dd4 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -828,6 +828,65 @@ def test_unsupported_fields_rejected(self):
                 self.assertIn(case['expected_substring'], str(context.exception))
 
 
+class TestFilePathTraversal(unittest.TestCase):
+    """Verify that file paths cannot escape the task directory."""
+
+    def setUp(self):
+        """Create a temporary work directory."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-traversal-')
+
+    def tearDown(self):
+        """Remove the temporary work directory."""
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    @mock.patch('subprocess.run')
+    def test_path_traversal_rejected(self, mock_run):
+        """A file spec with a path that escapes the task directory raises ValueError."""
+        mock_run.return_value = mock.Mock(returncode=0)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: traversal
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+                files:
+                - contents: "malicious"
+                  path: /../../etc/evil.conf
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._setup_directories()
+        node = executor._task_nodes['task']
+        with self.assertRaises(ValueError) as context:
+            executor._run_task(node, spec)
+        self.assertIn('escapes the task directory', str(context.exception))
+
+    @mock.patch('subprocess.run')
+    def test_safe_nested_path_accepted(self, mock_run):
+        """A file spec with a safe nested path is accepted without error."""
+        mock_run.return_value = mock.Mock(returncode=0)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: safe
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+                files:
+                - contents: "safe"
+                  path: /tmp/scripts/run.sh
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._setup_directories()
+        node = executor._task_nodes['task']
+        executor._run_task(node, spec)
+        mock_run.assert_called_once()
+
+
 class TestShmSize(unittest.TestCase):
     """Verify that --shm-size is passed to Docker for GPU tasks."""
 

From 88bc29535a133d752f27bae3abd5c5529ee00589 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 20:15:00 -0700
Subject: [PATCH 15/35] Clear GPU device specification in Docker arguments for
 LocalExecutor

- Updated the `LocalExecutor` class to remove unnecessary quotes around GPU device specifications in Docker command arguments, ensuring correct formatting.
- Cleared previous results at the start of the `execute` method to prevent data carryover between executions.
---
 src/utils/local_executor.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 01c11b6de..729f2f5af 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -120,6 +120,7 @@ def load_spec(self, spec_text: str) -> workflow_module.WorkflowSpec:
     def execute(self, spec: workflow_module.WorkflowSpec,
                 resume: bool = False, from_step: str | None = None) -> bool:
         """Run all tasks in topological order, returning True if the entire workflow succeeds."""
+        self._results.clear()
         self._build_dag(spec)
         self._validate_for_local(spec)
         self._setup_directories()
@@ -382,9 +383,9 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 logger.warning(
                     'Task "%s" requests %d GPU(s) but only %d available — running with %d GPU(s)',
                     node.name, gpu_count, available, available)
-                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(available))}"']
+                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(available))}']
             else:
-                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
+                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(gpu_count))}']
             logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
             shm_size = self._shm_size or self.DEFAULT_SHM_SIZE

From 93fe43c2c9f27cc9176dfb0e5366509ff07345cb Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 3 Apr 2026 20:34:53 -0700
Subject: [PATCH 16/35] Refactor shared memory size handling in LocalExecutor

- Updated the `LocalExecutor` class to ensure the `--shm-size` argument is included for CPU-only tasks when explicitly specified by the user.
- Adjusted the logic for setting the shared memory size to improve clarity and maintainability.
- Enhanced unit tests in `test_local_executor.py` to verify correct behavior for tasks with and without GPU resources regarding shared memory size.
---
 src/utils/local_executor.py            | 15 +++++++-------
 src/utils/tests/test_local_executor.py | 28 ++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 729f2f5af..da6d61f94 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -388,8 +388,9 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 docker_args += ['--gpus', f'device={",".join(str(i) for i in range(gpu_count))}']
             logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
-            shm_size = self._shm_size or self.DEFAULT_SHM_SIZE
-            docker_args += ['--shm-size', shm_size]
+            docker_args += ['--shm-size', self._shm_size or self.DEFAULT_SHM_SIZE]
+        elif self._shm_size:
+            docker_args += ['--shm-size', self._shm_size]
 
         for key, value in task_spec.environment.items():
             resolved_value = self._substitute_tokens(value, token_map)
@@ -463,11 +464,6 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
         raise ValueError(
             '--resume and --from-step require --work-dir pointing to a previous run directory.')
 
-    created_work_dir = work_dir is None
-    if created_work_dir:
-        work_dir = tempfile.mkdtemp(prefix='osmo-local-')
-        logger.info('Using temporary work directory: %s', work_dir)
-
     with open(spec_path, encoding='utf-8') as f:
         spec_text = f.read()
 
@@ -478,6 +474,11 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
             'Run "osmo workflow submit --dry-run -f <spec>" first to get the expanded spec,\n'
             'then save that output and run it locally.')
 
+    created_work_dir = work_dir is None
+    if created_work_dir:
+        work_dir = tempfile.mkdtemp(prefix='osmo-local-')
+        logger.info('Using temporary work directory: %s', work_dir)
+
     executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
                               docker_cmd=docker_cmd, shm_size=shm_size)
     spec = executor.load_spec(spec_text)
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 7d3f19dd4..97226248d 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -955,8 +955,8 @@ def test_gpu_task_gets_custom_shm_size(self, mock_run):
         self.assertEqual(docker_call_args[shm_index + 1], '32g')
 
     @mock.patch('subprocess.run')
-    def test_non_gpu_task_has_no_shm_size(self, mock_run):
-        """A task without GPU resources does not include --shm-size in Docker args."""
+    def test_non_gpu_task_has_no_default_shm_size(self, mock_run):
+        """A CPU-only task without explicit shm_size does not include --shm-size."""
         mock_run.return_value = mock.Mock(returncode=0)
         spec_text = textwrap.dedent('''\
             workflow:
@@ -976,6 +976,30 @@ def test_non_gpu_task_has_no_shm_size(self, mock_run):
         docker_call_args = mock_run.call_args[0][0]
         self.assertNotIn('--shm-size', docker_call_args)
 
+    @mock.patch('subprocess.run')
+    def test_non_gpu_task_gets_explicit_shm_size(self, mock_run):
+        """A CPU-only task gets --shm-size when the user explicitly specifies it."""
+        mock_run.return_value = mock.Mock(returncode=0)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: no-gpu
+              tasks:
+              - name: preprocess
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True, shm_size='8g')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._setup_directories()
+        node = executor._task_nodes['preprocess']
+        executor._run_task(node, spec)
+
+        docker_call_args = mock_run.call_args[0][0]
+        self.assertIn('--shm-size', docker_call_args)
+        shm_index = docker_call_args.index('--shm-size')
+        self.assertEqual(docker_call_args[shm_index + 1], '8g')
+
 
 class TestJinjaTemplateDetection(unittest.TestCase):
     """Verify that specs containing Jinja template markers are rejected before execution."""

From ed0d746b9b1402b23ad39cfc7caa2ea4b236762b Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 9 Apr 2026 11:36:42 -0700
Subject: [PATCH 17/35] Refactor LocalExecutor to use container paths for token
 mapping

- Updated the `LocalExecutor` class to replace host paths with container-side paths in the token mapping for output and input sources.
- Adjusted the `_build_token_map` method to eliminate the output directory parameter, aligning with the new container path structure.
- Modified Docker argument construction to reflect the new paths, ensuring correct volume mounts for inputs and outputs.
- Enhanced unit tests in `test_local_executor.py` to validate the updated token mapping behavior and ensure proper handling of container paths.
---
 src/utils/local_executor.py            | 22 +++++++++++-----------
 src/utils/tests/test_local_executor.py | 16 ++++++++--------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index da6d61f94..a66a5bb9c 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -35,6 +35,7 @@
 logger = logging.getLogger(__name__)
 
 STATE_FILE_NAME = '.osmo-state.json'
+CONTAINER_DATA_PATH = '/osmo/data'
 
 
 @dataclasses.dataclass
@@ -355,7 +356,7 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
         files_dir = os.path.join(task_dir, 'files')
         os.makedirs(files_dir, exist_ok=True)
 
-        token_map = self._build_token_map(node, output_dir)
+        token_map = self._build_token_map(node)
 
         for file_spec in task_spec.files:
             resolved_contents = self._substitute_tokens(file_spec.contents, token_map)
@@ -396,13 +397,12 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             resolved_value = self._substitute_tokens(value, token_map)
             docker_args += ['-e', f'{key}={resolved_value}']
 
-        docker_args += ['-v', f'{output_dir}:{output_dir}']
+        docker_args += ['-v', f'{output_dir}:{CONTAINER_DATA_PATH}/output']
 
         for index, input_source in enumerate(task_spec.inputs):
             if isinstance(input_source, task_module.TaskInputOutput):
                 upstream_result = self._results[input_source.task]
-                input_mount = token_map.get(f'input:{index}', upstream_result.output_dir)
-                docker_args += ['-v', f'{upstream_result.output_dir}:{input_mount}:ro']
+                docker_args += ['-v', f'{upstream_result.output_dir}:{CONTAINER_DATA_PATH}/input/{index}:ro']
 
         for file_spec in task_spec.files:
             host_path = os.path.realpath(os.path.join(files_dir, file_spec.path.lstrip('/')))
@@ -434,16 +434,16 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             logger.error('Docker not found. Is Docker installed and in your PATH?')
             return TaskResult(name=node.name, exit_code=127, output_dir=output_dir)
 
-    def _build_token_map(self, node: TaskNode, output_dir: str) -> Dict[str, str]:
-        """Build a mapping of {{token}} keys to host paths for output and each upstream input."""
+    def _build_token_map(self, node: TaskNode) -> Dict[str, str]:
+        """Build a mapping of {{token}} keys to container-side paths matching on-cluster layout."""
         tokens: Dict[str, str] = {
-            'output': output_dir,
+            'output': f'{CONTAINER_DATA_PATH}/output',
         }
         for index, input_source in enumerate(node.spec.inputs):
             if isinstance(input_source, task_module.TaskInputOutput):
-                upstream_result = self._results[input_source.task]
-                tokens[f'input:{input_source.task}'] = upstream_result.output_dir
-                tokens[f'input:{index}'] = upstream_result.output_dir
+                container_input_path = f'{CONTAINER_DATA_PATH}/input/{index}'
+                tokens[f'input:{input_source.task}'] = container_input_path
+                tokens[f'input:{index}'] = container_input_path
         return tokens
 
     def _substitute_tokens(self, text: str, tokens: Dict[str, str]) -> str:
@@ -475,7 +475,7 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
             'then save that output and run it locally.')
 
     created_work_dir = work_dir is None
-    if created_work_dir:
+    if work_dir is None:
         work_dir = tempfile.mkdtemp(prefix='osmo-local-')
         logger.info('Using temporary work directory: %s', work_dir)
 
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 97226248d..a4cb6b587 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -25,7 +25,7 @@
 from unittest import mock
 
 from src.utils.job import task as task_module
-from src.utils.local_executor import LocalExecutor, TaskNode, TaskResult, run_workflow_locally
+from src.utils.local_executor import CONTAINER_DATA_PATH, LocalExecutor, TaskNode, TaskResult, run_workflow_locally
 
 
 # ---------------------------------------------------------------------------
@@ -543,12 +543,12 @@ def test_output_only(self):
         executor._build_dag(spec)
 
         node = executor._task_nodes['task1']
-        tokens = executor._build_token_map(node, '/tmp/work/task1/output')
-        self.assertEqual(tokens['output'], '/tmp/work/task1/output')
+        tokens = executor._build_token_map(node)
+        self.assertEqual(tokens['output'], f'{CONTAINER_DATA_PATH}/output')
         self.assertEqual(len(tokens), 1)
 
     def test_with_upstream_inputs(self):
-        """A task with upstream inputs gets both index-based and name-based input tokens."""
+        """A task with upstream inputs gets both index-based and name-based input tokens pointing to container paths."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: serial
@@ -570,11 +570,11 @@ def test_with_upstream_inputs(self):
             name='producer', exit_code=0, output_dir='/tmp/work/producer/output')
 
         node = executor._task_nodes['consumer']
-        tokens = executor._build_token_map(node, '/tmp/work/consumer/output')
+        tokens = executor._build_token_map(node)
 
-        self.assertEqual(tokens['output'], '/tmp/work/consumer/output')
-        self.assertEqual(tokens['input:0'], '/tmp/work/producer/output')
-        self.assertEqual(tokens['input:producer'], '/tmp/work/producer/output')
+        self.assertEqual(tokens['output'], f'{CONTAINER_DATA_PATH}/output')
+        self.assertEqual(tokens['input:0'], f'{CONTAINER_DATA_PATH}/input/0')
+        self.assertEqual(tokens['input:producer'], f'{CONTAINER_DATA_PATH}/input/0')
 
 
 class TestValidateForLocal(unittest.TestCase):

From b1bf7924b0261f630edb1a715b484439bb57aa38 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 9 Apr 2026 11:46:38 -0700
Subject: [PATCH 18/35] Add cycle detection in LocalExecutor for task DAGs

- Implemented a `_check_for_cycles` method in the `LocalExecutor` class to identify and raise a `ValueError` for circular dependencies in task DAGs.
- Added a new test class `TestCycleDetection` in `test_local_executor.py` to validate the detection of direct and indirect cycles, ensuring proper error reporting for various cyclic configurations.
- Enhanced existing tests to confirm that non-cyclic task structures are accepted without errors.
---
 src/utils/local_executor.py            |  31 ++++++
 src/utils/tests/test_local_executor.py | 146 +++++++++++++++++++++++++
 2 files changed, 177 insertions(+)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index a66a5bb9c..563935803 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -265,6 +265,37 @@ def _build_dag(self, spec: workflow_module.WorkflowSpec):
                         self._task_nodes[task_spec.name].upstream.add(upstream_task)
                         self._task_nodes[upstream_task].downstream.add(task_spec.name)
 
+        self._check_for_cycles()
+
+    def _check_for_cycles(self):
+        """Raise ValueError if the task DAG contains any cycles, reporting the cycle path."""
+        UNVISITED, IN_PROGRESS, DONE = 0, 1, 2
+        state: Dict[str, int] = {name: UNVISITED for name in self._task_nodes}
+        path: List[str] = []
+
+        def visit(name: str) -> List[str] | None:
+            if state[name] == DONE:
+                return None
+            if state[name] == IN_PROGRESS:
+                cycle_start = path.index(name)
+                return path[cycle_start:] + [name]
+
+            state[name] = IN_PROGRESS
+            path.append(name)
+            for downstream in self._task_nodes[name].downstream:
+                cycle = visit(downstream)
+                if cycle is not None:
+                    return cycle
+            path.pop()
+            state[name] = DONE
+            return None
+
+        for name in self._task_nodes:
+            cycle = visit(name)
+            if cycle is not None:
+                raise ValueError(
+                    f'Circular dependency detected: {" -> ".join(cycle)}')
+
     def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
         """Raise ValueError if the spec uses features unsupported in local mode (datasets, URLs, credentials, etc.)."""
         unsupported_features = []
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index a4cb6b587..81561f56c 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -371,6 +371,152 @@ def test_groups_with_cross_group_deps(self):
         self.assertEqual(executor._task_nodes['transform'].upstream, {'download'})
 
 
+class TestCycleDetection(unittest.TestCase):
+    """Verify that circular dependencies are detected and reported during DAG construction."""
+
+    def _make_executor(self) -> LocalExecutor:
+        """Create a LocalExecutor with a throwaway work directory for cycle-detection tests."""
+        return LocalExecutor(work_dir='/tmp/unused')
+
+    def test_direct_cycle_two_tasks(self):
+        """Two tasks that depend on each other form a direct cycle and are rejected."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: cycle
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: b
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: a
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor._build_dag(spec)
+        error_message = str(context.exception)
+        self.assertIn('Circular dependency', error_message)
+        self.assertIn('a', error_message)
+        self.assertIn('b', error_message)
+
+    def test_indirect_cycle_three_tasks(self):
+        """Three tasks forming a cycle (a -> b -> c -> a) are rejected."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: cycle
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: c
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: a
+              - name: c
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: b
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor._build_dag(spec)
+        self.assertIn('Circular dependency', str(context.exception))
+
+    def test_cycle_in_subgraph_with_valid_root(self):
+        """A cycle in a subgraph is detected even when other tasks have no cycle."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: partial-cycle
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["echo"]
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+                - task: b
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: a
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor._build_dag(spec)
+        error_message = str(context.exception)
+        self.assertIn('Circular dependency', error_message)
+        self.assertIn('a', error_message)
+        self.assertIn('b', error_message)
+
+    def test_no_cycle_linear_chain(self):
+        """A linear chain (a -> b -> c) has no cycle and is accepted."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: linear
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: a
+              - name: c
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: b
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+    def test_no_cycle_diamond(self):
+        """A diamond DAG (root -> left/right -> join) has no cycle and is accepted."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: diamond
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["echo"]
+              - name: left
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+              - name: right
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+              - name: join
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: left
+                - task: right
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+
 class TestFindReadyTasks(unittest.TestCase):
     """Verify correct identification of tasks ready to execute."""
 

From 04615ca8ad0e143a9a0bb00eb8822b969e40f471 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 9 Apr 2026 11:57:51 -0700
Subject: [PATCH 19/35] Enhance LocalExecutor with lead-task failure policy and
 host token support

- Added support for a lead-task failure policy in the `LocalExecutor`, allowing non-lead task failures to be ignored when `ignoreNonleadStatus` is set to true.
- Implemented checks to determine if a task's failure can be ignored based on its lead status and group configuration.
- Introduced validation for tasks using `{{host:taskname}}` tokens, ensuring proper error handling for unsupported configurations.
- Expanded unit tests in `test_local_executor.py` to cover various scenarios for lead-task behavior and host token usage, ensuring robust functionality.
---
 src/utils/local_executor.py            |  65 +++++++--
 src/utils/tests/test_local_executor.py | 194 +++++++++++++++++++++++++
 2 files changed, 246 insertions(+), 13 deletions(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 563935803..832f8e4a1 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -63,7 +63,8 @@ class LocalExecutor:
     Executes an OSMO workflow spec locally using Docker, without Kubernetes.
 
     Supports:
-      - Serial and parallel task DAGs (groups flattened to individual tasks)
+      - Serial and parallel task DAGs
+      - Task groups with lead-task failure policy (ignoreNonleadStatus)
       - {{output}} and {{input:N}} / {{input:taskname}} token substitution
       - Inline `files:` written to the container
       - `environment:` passed as Docker env vars
@@ -74,6 +75,7 @@ class LocalExecutor:
       - Dataset / URL inputs/outputs (require object storage)
       - Credentials, checkpoints, volumeMounts (require cluster infra)
       - Templated specs with Jinja (require server-side expansion; use --dry-run first)
+      - {{host:taskname}} tokens (require parallel containers with shared networking)
     """
 
     DEFAULT_SHM_SIZE = '16g'
@@ -86,6 +88,7 @@ def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str =
         self._docker_cmd = docker_cmd
         self._shm_size = shm_size
         self._task_nodes: Dict[str, TaskNode] = {}
+        self._group_specs: Dict[str, task_module.TaskGroupSpec] = {}
         self._results: Dict[str, TaskResult] = {}
         self._available_gpus: int | None = None
 
@@ -149,11 +152,17 @@ def execute(self, spec: workflow_module.WorkflowSpec,
                 self._save_state()
 
                 if result.exit_code != 0:
-                    logger.error('Task "%s" failed with exit code %d', task_name, result.exit_code)
-                    self._cancel_downstream(task_name)
-                    return False
-
-                logger.info('Task "%s" completed successfully', task_name)
+                    if self._is_nonlead_failure_ignorable(task_name):
+                        logger.warning(
+                            'Non-lead task "%s" failed with exit code %d '
+                            '(ignored — group "%s" has ignoreNonleadStatus=true)',
+                            task_name, result.exit_code, node.group)
+                    else:
+                        logger.error('Task "%s" failed with exit code %d', task_name, result.exit_code)
+                        self._cancel_downstream(task_name)
+                        return False
+                else:
+                    logger.info('Task "%s" completed successfully', task_name)
 
             ready = self._find_ready_tasks()
 
@@ -163,9 +172,12 @@ def execute(self, spec: workflow_module.WorkflowSpec,
                          spec.name, ', '.join(sorted(unexecuted)))
             return False
 
-        failed = [name for name, r in self._results.items() if r.exit_code != 0]
-        if failed:
-            logger.error('Workflow failed. Failed tasks: %s', ', '.join(failed))
+        fatal_failures = [
+            name for name, r in self._results.items()
+            if r.exit_code != 0 and not self._is_nonlead_failure_ignorable(name)
+        ]
+        if fatal_failures:
+            logger.error('Workflow failed. Failed tasks: %s', ', '.join(fatal_failures))
             return False
 
         logger.info('Workflow "%s" completed successfully', spec.name)
@@ -243,11 +255,11 @@ def _groups(self, spec: workflow_module.WorkflowSpec) -> List[task_module.TaskGr
     def _build_dag(self, spec: workflow_module.WorkflowSpec):
         """Construct the internal DAG of TaskNodes from the workflow spec's tasks and input dependencies."""
         self._task_nodes.clear()
-        task_to_group: Dict[str, str] = {}
+        self._group_specs.clear()
 
         for group in self._groups(spec):
+            self._group_specs[group.name] = group
             for task_spec in group.tasks:
-                task_to_group[task_spec.name] = group.name
                 self._task_nodes[task_spec.name] = TaskNode(
                     name=task_spec.name,
                     spec=task_spec,
@@ -296,6 +308,8 @@ def visit(name: str) -> List[str] | None:
                 raise ValueError(
                     f'Circular dependency detected: {" -> ".join(cycle)}')
 
+    _HOST_TOKEN_PATTERN = re.compile(r'\{\{\s*host:[^}]+\}\}')
+
     def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
         """Raise ValueError if the spec uses features unsupported in local mode (datasets, URLs, credentials, etc.)."""
         unsupported_features = []
@@ -334,26 +348,51 @@ def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
                     unsupported_features.append(
                         f'Task "{task_spec.name}": hostNetwork is not supported in local mode')
 
+                if self._task_uses_host_tokens(task_spec):
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": {{{{host:taskname}}}} tokens require '
+                        f'parallel containers with shared networking')
+
         if unsupported_features:
             raise ValueError(
                 'The following features are not supported in local execution mode:\n  - '
                 + '\n  - '.join(unsupported_features))
 
+    def _task_uses_host_tokens(self, task_spec: task_module.TaskSpec) -> bool:
+        """Return True if any text field in the task spec contains {{host:...}} tokens."""
+        fields_to_check = list(task_spec.command) + list(task_spec.args)
+        fields_to_check += list(task_spec.environment.values())
+        fields_to_check += [file_spec.contents for file_spec in task_spec.files]
+        return any(self._HOST_TOKEN_PATTERN.search(field) for field in fields_to_check)
+
     def _setup_directories(self):
         """Create the work directory and per-task output directories on the host filesystem."""
         os.makedirs(self._work_dir, exist_ok=True)
         for task_name in self._task_nodes:
             os.makedirs(os.path.join(self._work_dir, task_name, 'output'), exist_ok=True)
 
+    def _is_nonlead_failure_ignorable(self, task_name: str) -> bool:
+        """Return True if the task is a non-lead task in a group with ignoreNonleadStatus=true."""
+        node = self._task_nodes[task_name]
+        group_spec = self._group_specs[node.group]
+        return group_spec.ignoreNonleadStatus and not node.spec.lead
+
+    def _is_task_satisfied(self, task_name: str) -> bool:
+        """Return True if a completed task's result counts as satisfied for downstream scheduling."""
+        result = self._results[task_name]
+        if result.exit_code == 0:
+            return True
+        return self._is_nonlead_failure_ignorable(task_name)
+
     def _find_ready_tasks(self) -> List[str]:
-        """Return tasks whose upstream dependencies have all completed successfully."""
+        """Return tasks whose upstream dependencies have all been satisfied, in spec declaration order."""
         completed = set(self._results.keys())
         ready = []
         for name, node in self._task_nodes.items():
             if name in completed:
                 continue
             if node.upstream.issubset(completed):
-                all_upstream_ok = all(self._results[u].exit_code == 0 for u in node.upstream)
+                all_upstream_ok = all(self._is_task_satisfied(u) for u in node.upstream)
                 if all_upstream_ok:
                     ready.append(name)
         return ready
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 81561f56c..5fb38bb06 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -960,6 +960,64 @@ class TestValidateForLocalRemainingBranches(unittest.TestCase):
             '''),
             'expected_substring': 'hostNetwork',
         },
+        'host_token_in_args': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  groups:
+                  - name: workers
+                    tasks:
+                    - name: leader
+                      lead: true
+                      image: ubuntu:24.04
+                      command: ["echo"]
+                      args: ["--peer={{host:follower}}"]
+                    - name: follower
+                      image: ubuntu:24.04
+                      command: ["echo"]
+            '''),
+            'expected_substring': 'host:taskname',
+        },
+        'host_token_in_env': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  groups:
+                  - name: workers
+                    tasks:
+                    - name: leader
+                      lead: true
+                      image: ubuntu:24.04
+                      command: ["echo"]
+                      environment:
+                        PEER_HOST: "{{ host:follower }}"
+                    - name: follower
+                      image: ubuntu:24.04
+                      command: ["echo"]
+            '''),
+            'expected_substring': 'host:taskname',
+        },
+        'host_token_in_files': {
+            'yaml': textwrap.dedent('''\
+                workflow:
+                  name: bad
+                  groups:
+                  - name: workers
+                    tasks:
+                    - name: leader
+                      lead: true
+                      image: ubuntu:24.04
+                      command: ["sh", "/tmp/run.sh"]
+                      files:
+                      - contents: |
+                          echo "connecting to {{host:follower}}"
+                        path: /tmp/run.sh
+                    - name: follower
+                      image: ubuntu:24.04
+                      command: ["echo"]
+            '''),
+            'expected_substring': 'host:taskname',
+        },
     }
 
     def test_unsupported_fields_rejected(self):
@@ -1033,6 +1091,142 @@ def test_safe_nested_path_accepted(self, mock_run):
         mock_run.assert_called_once()
 
 
+class TestLeadTaskFailurePolicy(unittest.TestCase):
+    """Verify ignoreNonleadStatus behavior: non-lead failures are tolerated when the flag is true."""
+
+    def setUp(self):
+        """Create a temporary work directory for lead-task policy tests."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-lead-')
+
+    def tearDown(self):
+        """Remove the temporary work directory after each test."""
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    @mock.patch('subprocess.run')
+    def test_nonlead_failure_ignored_when_flag_true(self, mock_run):
+        """With ignoreNonleadStatus=true (default), a non-lead failure does not abort the workflow."""
+        mock_run.side_effect = [
+            mock.Mock(returncode=0),
+            mock.Mock(returncode=1),
+        ]
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: lead-policy
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["sh", "-c", "exit 1"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertTrue(executor.execute(spec))
+
+    @mock.patch('subprocess.run')
+    def test_lead_failure_aborts_workflow(self, mock_run):
+        """Even with ignoreNonleadStatus=true, a lead task failure aborts the workflow."""
+        mock_run.return_value = mock.Mock(returncode=1)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: lead-policy
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c", "exit 1"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertFalse(executor.execute(spec))
+
+    @mock.patch('subprocess.run')
+    def test_nonlead_failure_aborts_when_flag_false(self, mock_run):
+        """With ignoreNonleadStatus=false, a non-lead failure aborts the workflow."""
+        mock_run.side_effect = [
+            mock.Mock(returncode=0),
+            mock.Mock(returncode=1),
+        ]
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: lead-policy
+              groups:
+              - name: workers
+                ignoreNonleadStatus: false
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["sh", "-c", "exit 1"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertFalse(executor.execute(spec))
+
+    @mock.patch('subprocess.run')
+    def test_nonlead_failure_does_not_block_downstream_group(self, mock_run):
+        """A tolerated non-lead failure does not prevent a downstream group from running."""
+        mock_run.side_effect = [
+            mock.Mock(returncode=0),
+            mock.Mock(returncode=1),
+            mock.Mock(returncode=0),
+        ]
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: downstream-after-nonlead-fail
+              groups:
+              - name: first
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["sh", "-c", "exit 1"]
+              - name: second
+                tasks:
+                - name: consumer
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+                  inputs:
+                  - task: leader
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertTrue(executor.execute(spec))
+        self.assertEqual(mock_run.call_count, 3)
+
+    @mock.patch('subprocess.run')
+    def test_single_task_group_failure_aborts(self, mock_run):
+        """A single-task group (auto-promoted to lead) aborts on failure like normal."""
+        mock_run.return_value = mock.Mock(returncode=1)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: single-fail
+              tasks:
+              - name: only-task
+                image: alpine:3.18
+                command: ["sh", "-c", "exit 1"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertFalse(executor.execute(spec))
+
+
 class TestShmSize(unittest.TestCase):
     """Verify that --shm-size is passed to Docker for GPU tasks."""
 

From 3c88f8ff35069c1c7674a642c82ed151cd1dc2ce Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 9 Apr 2026 12:37:18 -0700
Subject: [PATCH 20/35] Enhance LocalExecutor with unresolved token detection

- Added functionality to detect unresolved {{variable}} tokens in command, args, environment values, and file contents before executing tasks in the `LocalExecutor`.
- Implemented a `_check_unresolved_tokens` method to raise a ValueError if unresolved tokens are found, providing a helpful error message suggesting the use of a dry-run for template expansion.
- Introduced a new test class `TestUnresolvedTokenDetection` in `test_local_executor.py` to validate the detection of unresolved tokens across various scenarios, ensuring robust error handling.
---
 src/utils/local_executor.py            |  26 ++++-
 src/utils/tests/test_local_executor.py | 140 +++++++++++++++++++++++++
 2 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/src/utils/local_executor.py b/src/utils/local_executor.py
index 832f8e4a1..c516a46fb 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/local_executor.py
@@ -440,6 +440,11 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
 
         resolved_command = [self._substitute_tokens(c, token_map) for c in task_spec.command]
         resolved_args = [self._substitute_tokens(a, token_map) for a in task_spec.args]
+        resolved_env_values = [self._substitute_tokens(v, token_map) for v in task_spec.environment.values()]
+
+        all_resolved = resolved_command + resolved_args + resolved_env_values
+        all_resolved += [self._substitute_tokens(f.contents, token_map) for f in task_spec.files]
+        self._check_unresolved_tokens(node.name, all_resolved)
 
         docker_args = [self._docker_cmd, 'run', '--rm']
 
@@ -463,9 +468,8 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
         elif self._shm_size:
             docker_args += ['--shm-size', self._shm_size]
 
-        for key, value in task_spec.environment.items():
-            resolved_value = self._substitute_tokens(value, token_map)
-            docker_args += ['-e', f'{key}={resolved_value}']
+        for env_key, resolved_value in zip(task_spec.environment.keys(), resolved_env_values):
+            docker_args += ['-e', f'{env_key}={resolved_value}']
 
         docker_args += ['-v', f'{output_dir}:{CONTAINER_DATA_PATH}/output']
 
@@ -516,12 +520,28 @@ def _build_token_map(self, node: TaskNode) -> Dict[str, str]:
                 tokens[f'input:{index}'] = container_input_path
         return tokens
 
+    _UNRESOLVED_TOKEN_PATTERN = re.compile(r'\{\{[^}]+\}\}')
+
     def _substitute_tokens(self, text: str, tokens: Dict[str, str]) -> str:
         """Replace all {{key}} placeholders in text with their corresponding token values."""
         for key, value in tokens.items():
             text = re.sub(r'\{\{\s*' + re.escape(key) + r'\s*\}\}', value, text)
         return text
 
+    def _check_unresolved_tokens(self, task_name: str, resolved_fields: List[str]):
+        """Raise ValueError if any resolved field still contains {{ }} placeholders."""
+        unresolved: List[str] = []
+        for field in resolved_fields:
+            for match in self._UNRESOLVED_TOKEN_PATTERN.finditer(field):
+                token = match.group(0)
+                if token not in unresolved:
+                    unresolved.append(token)
+        if unresolved:
+            raise ValueError(
+                f'Task "{task_name}" has unresolved token(s): {", ".join(unresolved)}. '
+                f'If this spec uses Jinja templates, run "osmo workflow submit --dry-run -f <spec>" '
+                f'first to expand them.')
+
 
 def run_workflow_locally(spec_path: str, work_dir: str | None = None,
                          keep_work_dir: bool = False,
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_local_executor.py
index 5fb38bb06..3c234c0dc 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_local_executor.py
@@ -1227,6 +1227,146 @@ def test_single_task_group_failure_aborts(self, mock_run):
         self.assertFalse(executor.execute(spec))
 
 
+class TestUnresolvedTokenDetection(unittest.TestCase):
+    """Verify that unresolved {{variable}} tokens are detected before running containers."""
+
+    def setUp(self):
+        """Create a temporary work directory for unresolved token tests."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-tokens-')
+
+    def tearDown(self):
+        """Remove the temporary work directory after each test."""
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_jinja_variable_in_args_detected(self):
+        """A bare {{variable}} in args (without default-values section) is caught before execution."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: jinja-leak
+              tasks:
+              - name: task
+                image: "alpine:3.18"
+                command: ["echo"]
+                args: ["{{experiment_name}}"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('unresolved', str(context.exception).lower())
+        self.assertIn('experiment_name', str(context.exception))
+
+    def test_jinja_variable_in_command_detected(self):
+        """A bare {{variable}} in command is caught before execution."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: jinja-leak
+              tasks:
+              - name: task
+                image: "alpine:3.18"
+                command: ["{{my_binary}}"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('my_binary', str(context.exception))
+
+    def test_jinja_variable_in_env_detected(self):
+        """A bare {{variable}} in environment values is caught before execution."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: jinja-leak
+              tasks:
+              - name: task
+                image: "alpine:3.18"
+                command: ["echo"]
+                environment:
+                  MY_VAR: "{{some_value}}"
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('some_value', str(context.exception))
+
+    def test_jinja_variable_in_file_contents_detected(self):
+        """A bare {{variable}} in file contents is caught before execution."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: jinja-leak
+              tasks:
+              - name: task
+                image: "alpine:3.18"
+                command: ["sh", "/tmp/run.sh"]
+                files:
+                - contents: |
+                    echo {{config_path}}/data
+                  path: /tmp/run.sh
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('config_path', str(context.exception))
+
+    def test_typo_in_osmo_token_detected(self):
+        """A typo in an OSMO token (e.g., {{ouptut}}) is caught as unresolved."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: typo
+              tasks:
+              - name: task
+                image: "alpine:3.18"
+                command: ["sh", "-c"]
+                args: ["echo data > {{ouptut}}/file.txt"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('ouptut', str(context.exception))
+
+    @mock.patch('subprocess.run')
+    def test_valid_osmo_tokens_not_flagged(self, mock_run):
+        """Valid OSMO tokens ({{output}}, {{input:0}}) are resolved and not flagged as unresolved."""
+        mock_run.return_value = mock.Mock(returncode=0)
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: valid
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo ok > {{output}}/data.txt"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:0}}/data.txt > {{ output }}/result.txt"]
+                inputs:
+                - task: producer
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        executor.execute(spec)
+
+    def test_error_message_suggests_dry_run(self):
+        """The unresolved token error message suggests using --dry-run to expand templates."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: helpful
+              tasks:
+              - name: task
+                image: "alpine:3.18"
+                command: ["echo", "{{missing}}"]
+        ''')
+        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('dry-run', str(context.exception))
+
+
 class TestShmSize(unittest.TestCase):
     """Verify that --shm-size is passed to Docker for GPU tasks."""
 

From 38a9e27cc46db4ca1d1c71b0ffa69bddeed360df Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 9 Apr 2026 12:53:52 -0700
Subject: [PATCH 21/35] Refactor LocalExecutor to StandaloneExecutor for
 Docker-based execution

- Replaced the `LocalExecutor` with `StandaloneExecutor` to facilitate Docker-based workflow execution without Kubernetes.
- Updated relevant documentation in `AGENTS.md` to reflect the new executor's capabilities and entry points.
- Modified CLI commands and parser to integrate the `standalone` subcommand for executing workflows in standalone mode.
- Adjusted build configurations to include the new `standalone_executor` and updated test references accordingly.
- Introduced comprehensive tests for the `StandaloneExecutor` to ensure functionality and validate workflow execution scenarios.
---
 AGENTS.md                                     |   4 +-
 src/cli/BUILD                                 |   4 +-
 src/cli/main_parser.py                        |   4 +-
 src/cli/{local.py => standalone.py}           |  22 +-
 src/utils/BUILD                               |   4 +-
 ...cal_executor.py => standalone_executor.py} |  38 ++--
 src/utils/tests/BUILD                         |   6 +-
 ...xecutor.py => test_standalone_executor.py} | 208 +++++++++---------
 8 files changed, 144 insertions(+), 146 deletions(-)
 rename src/cli/{local.py => standalone.py} (80%)
 rename src/utils/{local_executor.py => standalone_executor.py} (95%)
 rename src/utils/tests/{test_local_executor.py => test_standalone_executor.py} (92%)

diff --git a/AGENTS.md b/AGENTS.md
index 3c8129e5a..90a401f2b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -120,7 +120,7 @@ Entry point: `service/core/service.py`. Framework: FastAPI + Uvicorn + OpenTelem
 | `utils/job/` | `Task`, `FrontendJob`, `K8sObjectFactory`, `PodGroupTopologyBuilder` | Workflow execution framework. Task → K8s spec generation. Gang scheduling via PodGroup. Topology constraints. Backend job definitions. |
 | `utils/connectors/` | `ClusterConnector`, `PostgresConnector`, `RedisConnector` | K8s API wrapper, PostgreSQL operations, Redis job queue management. |
 | `utils/secret_manager/` | `SecretManager` | JWE-based secret encryption/decryption. MEK/UEK key management. |
-| `utils/local_executor.py` | `LocalExecutor`, `run_workflow_locally` | Local Docker-based workflow execution. Runs workflow specs without Kubernetes by mapping tasks to `docker run` commands with volume mounts for data flow. Supports DAG scheduling, resume (`--from-step`), and GPU passthrough. |
+| `utils/standalone_executor.py` | `StandaloneExecutor`, `run_workflow_standalone` | Standalone Docker-based workflow execution. Runs workflow specs without Kubernetes by mapping tasks to `docker run` commands with volume mounts for data flow. Supports DAG scheduling, resume (`--from-step`), and GPU passthrough. |
 | `utils/progress_check/` | — | Liveness/progress tracking for long-running services. |
 | `utils/metrics/` | — | Prometheus metrics collection and export. |
 
@@ -140,7 +140,7 @@ Entry point: `cli.py` → `main_parser.py` (argparse). Subcommand modules:
 | `login.py`                                                                                                     | Authentication                   |
 | `pool.py`, `resources.py`, `user.py`, `credential.py`, `access_token.py`, `bucket.py`, `task.py`, `version.py` | Supporting commands              |
 | `backend.py`                                                                                                   | Backend cluster management       |
-| `local.py`                                                                                                     | Local workflow execution via Docker (`osmo local run`) |
+| `standalone.py`                                                                                                | Standalone workflow execution via Docker (`osmo standalone run`) |
 
 Features: Tab completion (shtab), response formatting (`formatters.py`), spec editor (`editor.py`), PyInstaller packaging (`cli_builder.py`, `packaging/`).
 
diff --git a/src/cli/BUILD b/src/cli/BUILD
index cdada591a..eade2ea71 100755
--- a/src/cli/BUILD
+++ b/src/cli/BUILD
@@ -37,7 +37,7 @@ osmo_py_library(
         "dataset.py",
         "editor.py",
         "formatters.py",
-        "local.py",
+        "standalone.py",
         "login.py",
         "main_parser.py",
         "pool.py",
@@ -74,7 +74,7 @@ osmo_py_library(
         "//src/lib/utils:validation",
         "//src/lib/utils:version",
         "//src/lib/utils:workflow",
-        "//src/utils:local_executor",
+        "//src/utils:standalone_executor",
     ],
 )
 
diff --git a/src/cli/main_parser.py b/src/cli/main_parser.py
index bd097111d..59dfc0043 100644
--- a/src/cli/main_parser.py
+++ b/src/cli/main_parser.py
@@ -28,7 +28,7 @@
     credential,
     data,
     dataset,
-    local,
+    standalone,
     login,
     pool,
     profile,
@@ -57,7 +57,7 @@
     pool.setup_parser,
     user.setup_parser,
     config.setup_parser,
-    local.setup_parser,
+    standalone.setup_parser,
 )
 
 
diff --git a/src/cli/local.py b/src/cli/standalone.py
similarity index 80%
rename from src/cli/local.py
rename to src/cli/standalone.py
index 67eef4ca4..d30e764ff 100644
--- a/src/cli/local.py
+++ b/src/cli/standalone.py
@@ -21,20 +21,20 @@
 
 import shtab
 
-from src.utils import local_executor
+from src.utils import standalone_executor
 
 
 def setup_parser(parser: argparse._SubParsersAction):
-    """Register the 'local' subcommand and its nested 'run' action with the CLI argument parser."""
-    local_parser = parser.add_parser(
-        'local',
-        help='Run workflows locally using Docker (no Kubernetes cluster required).')
-    subparsers = local_parser.add_subparsers(dest='command')
+    """Register the 'standalone' subcommand and its nested 'run' action with the CLI argument parser."""
+    standalone_parser = parser.add_parser(
+        'standalone',
+        help='Run workflows in standalone mode using Docker containers (no Kubernetes cluster required).')
+    subparsers = standalone_parser.add_subparsers(dest='command')
     subparsers.required = True
 
     run_parser = subparsers.add_parser(
         'run',
-        help='Execute a workflow spec locally using Docker containers.')
+        help='Execute a workflow spec in standalone mode using Docker containers.')
     run_parser.add_argument(
         '-f', '--file',
         required=True,
@@ -75,13 +75,13 @@ def setup_parser(parser: argparse._SubParsersAction):
         help='Shared memory size for GPU containers (e.g. 16g, 32g). '
              'Defaults to 16g for tasks that request GPUs. '
              'PyTorch DataLoader workers require large shared memory.')
-    run_parser.set_defaults(func=_run_local)
+    run_parser.set_defaults(func=_run_standalone)
 
 
-def _run_local(service_client, args: argparse.Namespace):
-    """Execute a workflow locally via Docker using the parsed CLI arguments."""
+def _run_standalone(service_client, args: argparse.Namespace):
+    """Execute a workflow in standalone mode via Docker using the parsed CLI arguments."""
     try:
-        success = local_executor.run_workflow_locally(
+        success = standalone_executor.run_workflow_standalone(
             spec_path=args.workflow_file,
             work_dir=args.work_dir,
             keep_work_dir=args.keep,
diff --git a/src/utils/BUILD b/src/utils/BUILD
index 8a29aa9af..8dbf59a94 100644
--- a/src/utils/BUILD
+++ b/src/utils/BUILD
@@ -128,8 +128,8 @@ osmo_py_library(
 )
 
 osmo_py_library(
-    name = "local_executor",
-    srcs = ["local_executor.py"],
+    name = "standalone_executor",
+    srcs = ["standalone_executor.py"],
     deps = [
         requirement("pyyaml"),
         "//src/utils/job",
diff --git a/src/utils/local_executor.py b/src/utils/standalone_executor.py
similarity index 95%
rename from src/utils/local_executor.py
rename to src/utils/standalone_executor.py
index c516a46fb..8f4493140 100644
--- a/src/utils/local_executor.py
+++ b/src/utils/standalone_executor.py
@@ -58,9 +58,9 @@ class TaskResult:
     output_dir: str
 
 
-class LocalExecutor:
+class StandaloneExecutor:
     """
-    Executes an OSMO workflow spec locally using Docker, without Kubernetes.
+    Executes an OSMO workflow spec in standalone mode using Docker, without Kubernetes.
 
     Supports:
       - Serial and parallel task DAGs
@@ -126,7 +126,7 @@ def execute(self, spec: workflow_module.WorkflowSpec,
         """Run all tasks in topological order, returning True if the entire workflow succeeds."""
         self._results.clear()
         self._build_dag(spec)
-        self._validate_for_local(spec)
+        self._validate_for_standalone(spec)
         self._setup_directories()
 
         if resume or from_step:
@@ -310,8 +310,8 @@ def visit(name: str) -> List[str] | None:
 
     _HOST_TOKEN_PATTERN = re.compile(r'\{\{\s*host:[^}]+\}\}')
 
-    def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
-        """Raise ValueError if the spec uses features unsupported in local mode (datasets, URLs, credentials, etc.)."""
+    def _validate_for_standalone(self, spec: workflow_module.WorkflowSpec):
+        """Raise ValueError if the spec uses features unsupported in standalone mode (datasets, URLs, credentials, etc.)."""
         unsupported_features = []
         for group in self._groups(spec):
             for task_spec in group.tasks:
@@ -342,11 +342,11 @@ def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
 
                 if task_spec.privileged:
                     unsupported_features.append(
-                        f'Task "{task_spec.name}": privileged containers are not supported in local mode')
+                        f'Task "{task_spec.name}": privileged containers are not supported in standalone mode')
 
                 if task_spec.hostNetwork:
                     unsupported_features.append(
-                        f'Task "{task_spec.name}": hostNetwork is not supported in local mode')
+                        f'Task "{task_spec.name}": hostNetwork is not supported in standalone mode')
 
                 if self._task_uses_host_tokens(task_spec):
                     unsupported_features.append(
@@ -355,7 +355,7 @@ def _validate_for_local(self, spec: workflow_module.WorkflowSpec):
 
         if unsupported_features:
             raise ValueError(
-                'The following features are not supported in local execution mode:\n  - '
+                'The following features are not supported in standalone execution mode:\n  - '
                 + '\n  - '.join(unsupported_features))
 
     def _task_uses_host_tokens(self, task_spec: task_module.TaskSpec) -> bool:
@@ -543,13 +543,13 @@ def _check_unresolved_tokens(self, task_name: str, resolved_fields: List[str]):
                 f'first to expand them.')
 
 
-def run_workflow_locally(spec_path: str, work_dir: str | None = None,
-                         keep_work_dir: bool = False,
-                         resume: bool = False,
-                         from_step: str | None = None,
-                         docker_cmd: str = 'docker',
-                         shm_size: str | None = None) -> bool:
-    """Load a workflow spec from disk and execute it locally via Docker, managing the work directory lifecycle."""
+def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
+                            keep_work_dir: bool = False,
+                            resume: bool = False,
+                            from_step: str | None = None,
+                            docker_cmd: str = 'docker',
+                            shm_size: str | None = None) -> bool:
+    """Load a workflow spec from disk and execute it in standalone mode via Docker, managing the work directory lifecycle."""
     if (resume or from_step) and work_dir is None:
         raise ValueError(
             '--resume and --from-step require --work-dir pointing to a previous run directory.')
@@ -562,15 +562,15 @@ def run_workflow_locally(spec_path: str, work_dir: str | None = None,
         raise ValueError(
             'This spec uses Jinja templates which require server-side expansion.\n'
             'Run "osmo workflow submit --dry-run -f <spec>" first to get the expanded spec,\n'
-            'then save that output and run it locally.')
+            'then save that output and run it standalone.')
 
     created_work_dir = work_dir is None
     if work_dir is None:
-        work_dir = tempfile.mkdtemp(prefix='osmo-local-')
+        work_dir = tempfile.mkdtemp(prefix='osmo-standalone-')
         logger.info('Using temporary work directory: %s', work_dir)
 
-    executor = LocalExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                              docker_cmd=docker_cmd, shm_size=shm_size)
+    executor = StandaloneExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
+                                   docker_cmd=docker_cmd, shm_size=shm_size)
     spec = executor.load_spec(spec_text)
     success = executor.execute(spec, resume=resume or from_step is not None,
                                from_step=from_step)
diff --git a/src/utils/tests/BUILD b/src/utils/tests/BUILD
index efe72682f..b555f85bc 100644
--- a/src/utils/tests/BUILD
+++ b/src/utils/tests/BUILD
@@ -56,10 +56,10 @@ osmo_py_test(
 )
 
 py_test(
-    name = "test_local_executor",
-    srcs = ["test_local_executor.py"],
+    name = "test_standalone_executor",
+    srcs = ["test_standalone_executor.py"],
     deps = [
-        "//src/utils:local_executor",
+        "//src/utils:standalone_executor",
     ],
     data = [
         "//cookbook/tutorials:tutorial_specs",
diff --git a/src/utils/tests/test_local_executor.py b/src/utils/tests/test_standalone_executor.py
similarity index 92%
rename from src/utils/tests/test_local_executor.py
rename to src/utils/tests/test_standalone_executor.py
index 3c234c0dc..2eae253f6 100644
--- a/src/utils/tests/test_local_executor.py
+++ b/src/utils/tests/test_standalone_executor.py
@@ -25,7 +25,7 @@
 from unittest import mock
 
 from src.utils.job import task as task_module
-from src.utils.local_executor import CONTAINER_DATA_PATH, LocalExecutor, TaskNode, TaskResult, run_workflow_locally
+from src.utils.standalone_executor import CONTAINER_DATA_PATH, StandaloneExecutor, TaskNode, TaskResult, run_workflow_standalone
 
 
 # ---------------------------------------------------------------------------
@@ -65,7 +65,7 @@ def test_single_task_spec(self):
                 command: ["echo"]
                 args: ["Hello from OSMO!"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         self.assertEqual(spec.name, 'hello-osmo')
         self.assertEqual(len(spec.tasks), 1)
@@ -98,7 +98,7 @@ def test_serial_tasks_spec(self):
                 inputs:
                 - task: task1
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         self.assertEqual(spec.name, 'serial-tasks')
         self.assertEqual(len(spec.tasks), 2)
@@ -123,7 +123,7 @@ def test_groups_spec(self):
                   image: ubuntu:24.04
                   command: ["echo", "follower"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         self.assertEqual(len(spec.groups), 1)
         self.assertEqual(len(spec.groups[0].tasks), 2)
@@ -140,7 +140,7 @@ def test_versioned_spec(self):
                 image: alpine:3.18
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         self.assertEqual(spec.name, 'versioned')
 
@@ -155,7 +155,7 @@ def test_invalid_version_rejected(self):
                 image: alpine:3.18
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         with self.assertRaises(ValueError):
             executor.load_spec(spec_text)
 
@@ -175,7 +175,7 @@ def test_both_tasks_and_groups_rejected(self):
                   image: alpine:3.18
                   command: ["echo"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         with self.assertRaises(ValueError):
             executor.load_spec(spec_text)
 
@@ -185,7 +185,7 @@ def test_empty_workflow_rejected(self):
             workflow:
               name: empty
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         with self.assertRaises(ValueError):
             executor.load_spec(spec_text)
 
@@ -204,7 +204,7 @@ def test_resources_spec_parsed(self):
                 image: ubuntu:24.04
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         self.assertEqual(spec.resources['default'].cpu, 2)
         self.assertEqual(spec.resources['default'].memory, '4Gi')
@@ -222,7 +222,7 @@ def test_environment_parsed(self):
                   MY_VAR: hello
                   ANOTHER: world
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         self.assertEqual(spec.tasks[0].environment['MY_VAR'], 'hello')
         self.assertEqual(spec.tasks[0].environment['ANOTHER'], 'world')
@@ -231,9 +231,9 @@ def test_environment_parsed(self):
 class TestBuildDag(unittest.TestCase):
     """Verify DAG construction from task dependencies."""
 
-    def _make_executor(self) -> LocalExecutor:
-        """Create a LocalExecutor with a throwaway work directory for DAG-only tests."""
-        return LocalExecutor(work_dir='/tmp/unused')
+    def _make_executor(self) -> StandaloneExecutor:
+        """Create a StandaloneExecutor with a throwaway work directory for DAG-only tests."""
+        return StandaloneExecutor(work_dir='/tmp/unused')
 
     def test_no_dependencies(self):
         """All tasks with no input dependencies have empty upstream and downstream sets."""
@@ -374,9 +374,9 @@ def test_groups_with_cross_group_deps(self):
 class TestCycleDetection(unittest.TestCase):
     """Verify that circular dependencies are detected and reported during DAG construction."""
 
-    def _make_executor(self) -> LocalExecutor:
-        """Create a LocalExecutor with a throwaway work directory for cycle-detection tests."""
-        return LocalExecutor(work_dir='/tmp/unused')
+    def _make_executor(self) -> StandaloneExecutor:
+        """Create a StandaloneExecutor with a throwaway work directory for cycle-detection tests."""
+        return StandaloneExecutor(work_dir='/tmp/unused')
 
     def test_direct_cycle_two_tasks(self):
         """Two tasks that depend on each other form a direct cycle and are rejected."""
@@ -533,7 +533,7 @@ def test_all_root_tasks_ready(self):
                 image: alpine:3.18
                 command: ["echo"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
 
@@ -555,7 +555,7 @@ def test_dependent_not_ready_until_upstream_completes(self):
                 inputs:
                 - task: first
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
 
@@ -581,7 +581,7 @@ def test_failed_upstream_blocks_downstream(self):
                 inputs:
                 - task: first
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
 
@@ -613,7 +613,7 @@ def test_cascading_cancel(self):
                 inputs:
                 - task: b
         ''')
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
 
@@ -631,42 +631,42 @@ class TestSubstituteTokens(unittest.TestCase):
 
     def test_output_token(self):
         """The {{output}} token is replaced with the task output directory path."""
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         tokens = {'output': '/work/task1/output'}
         result = executor._substitute_tokens('echo data > {{output}}/file.txt', tokens)
         self.assertEqual(result, 'echo data > /work/task1/output/file.txt')
 
     def test_input_by_index(self):
         """The {{input:N}} token is replaced with the Nth upstream output directory."""
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         tokens = {'input:0': '/work/upstream/output'}
         result = executor._substitute_tokens('cat {{input:0}}/data.csv', tokens)
         self.assertEqual(result, 'cat /work/upstream/output/data.csv')
 
     def test_input_by_name(self):
         """The {{input:taskname}} token is replaced with the named task's output directory."""
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         tokens = {'input:task1': '/work/task1/output'}
         result = executor._substitute_tokens('cat {{ input:task1 }}/data.csv', tokens)
         self.assertEqual(result, 'cat /work/task1/output/data.csv')
 
     def test_whitespace_around_tokens(self):
         """Whitespace inside {{ token }} braces is tolerated during substitution."""
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         tokens = {'output': '/out'}
         result = executor._substitute_tokens('{{ output }}/file.txt', tokens)
         self.assertEqual(result, '/out/file.txt')
 
     def test_multiple_tokens_in_one_string(self):
         """Multiple distinct tokens in the same string are all replaced."""
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         tokens = {'output': '/out', 'input:0': '/in0'}
         result = executor._substitute_tokens('cp {{input:0}}/src {{output}}/dst', tokens)
         self.assertEqual(result, 'cp /in0/src /out/dst')
 
     def test_no_tokens_unchanged(self):
         """Text without any token placeholders passes through unchanged."""
-        executor = LocalExecutor(work_dir='/tmp/unused')
+        executor = StandaloneExecutor(work_dir='/tmp/unused')
         result = executor._substitute_tokens('plain text no tokens', {})
         self.assertEqual(result, 'plain text no tokens')
 
@@ -684,7 +684,7 @@ def test_output_only(self):
                 image: alpine:3.18
                 command: ["echo"]
         ''')
-        executor = LocalExecutor(work_dir='/tmp/work')
+        executor = StandaloneExecutor(work_dir='/tmp/work')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
 
@@ -708,7 +708,7 @@ def test_with_upstream_inputs(self):
                 inputs:
                 - task: producer
         ''')
-        executor = LocalExecutor(work_dir='/tmp/work')
+        executor = StandaloneExecutor(work_dir='/tmp/work')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
 
@@ -723,15 +723,15 @@ def test_with_upstream_inputs(self):
         self.assertEqual(tokens['input:producer'], f'{CONTAINER_DATA_PATH}/input/0')
 
 
-class TestValidateForLocal(unittest.TestCase):
+class TestValidateForStandalone(unittest.TestCase):
     """Verify that unsupported features are detected and rejected."""
 
-    def _make_executor(self) -> LocalExecutor:
-        """Create a LocalExecutor with a throwaway work directory for validation-only tests."""
-        return LocalExecutor(work_dir='/tmp/unused')
+    def _make_executor(self) -> StandaloneExecutor:
+        """Create a StandaloneExecutor with a throwaway work directory for validation-only tests."""
+        return StandaloneExecutor(work_dir='/tmp/unused')
 
     def test_simple_spec_passes(self):
-        """A spec using only task-to-task inputs passes local validation."""
+        """A spec using only task-to-task inputs passes standalone validation."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: ok
@@ -743,10 +743,10 @@ def test_simple_spec_passes(self):
         executor = self._make_executor()
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
-        executor._validate_for_local(spec)
+        executor._validate_for_standalone(spec)
 
     def test_dataset_input_rejected(self):
-        """A spec with dataset inputs is rejected as unsupported in local mode."""
+        """A spec with dataset inputs is rejected as unsupported in standalone mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -762,11 +762,11 @@ def test_dataset_input_rejected(self):
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         with self.assertRaises(ValueError) as context:
-            executor._validate_for_local(spec)
+            executor._validate_for_standalone(spec)
         self.assertIn('dataset', str(context.exception))
 
     def test_url_input_rejected(self):
-        """A spec with URL inputs is rejected as unsupported in local mode."""
+        """A spec with URL inputs is rejected as unsupported in standalone mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -781,11 +781,11 @@ def test_url_input_rejected(self):
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         with self.assertRaises(ValueError) as context:
-            executor._validate_for_local(spec)
+            executor._validate_for_standalone(spec)
         self.assertIn('URL', str(context.exception))
 
     def test_dataset_output_rejected(self):
-        """A spec with dataset outputs is rejected as unsupported in local mode."""
+        """A spec with dataset outputs is rejected as unsupported in standalone mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -801,11 +801,11 @@ def test_dataset_output_rejected(self):
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         with self.assertRaises(ValueError) as context:
-            executor._validate_for_local(spec)
+            executor._validate_for_standalone(spec)
         self.assertIn('dataset', str(context.exception).lower())
 
     def test_url_output_rejected(self):
-        """A spec with URL outputs is rejected as unsupported in local mode."""
+        """A spec with URL outputs is rejected as unsupported in standalone mode."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -820,7 +820,7 @@ def test_url_output_rejected(self):
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         with self.assertRaises(ValueError) as context:
-            executor._validate_for_local(spec)
+            executor._validate_for_standalone(spec)
         self.assertIn('object storage', str(context.exception).lower())
 
     def test_multiple_unsupported_features_all_reported(self):
@@ -845,13 +845,13 @@ def test_multiple_unsupported_features_all_reported(self):
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         with self.assertRaises(ValueError) as context:
-            executor._validate_for_local(spec)
+            executor._validate_for_standalone(spec)
         error_message = str(context.exception)
         self.assertIn('task1', error_message)
         self.assertIn('task2', error_message)
 
     def test_task_deps_only_passes(self):
-        """A spec with only task-to-task dependencies passes local validation."""
+        """A spec with only task-to-task dependencies passes standalone validation."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: ok
@@ -868,10 +868,10 @@ def test_task_deps_only_passes(self):
         executor = self._make_executor()
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
-        executor._validate_for_local(spec)
+        executor._validate_for_standalone(spec)
 
     def test_files_and_env_pass(self):
-        """A spec using files and environment variables passes local validation."""
+        """A spec using files and environment variables passes standalone validation."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: ok
@@ -888,11 +888,11 @@ def test_files_and_env_pass(self):
         executor = self._make_executor()
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
-        executor._validate_for_local(spec)
+        executor._validate_for_standalone(spec)
 
 
-class TestValidateForLocalRemainingBranches(unittest.TestCase):
-    """Verify that _validate_for_local rejects credentials, checkpoint, volumeMounts, privileged, and hostNetwork."""
+class TestValidateForStandaloneRemainingBranches(unittest.TestCase):
+    """Verify that _validate_for_standalone rejects credentials, checkpoint, volumeMounts, privileged, and hostNetwork."""
 
     _UNSUPPORTED_SPECS = {
         'credentials': {
@@ -1024,11 +1024,11 @@ def test_unsupported_fields_rejected(self):
         """Each unsupported task-level field is detected and rejected with a descriptive error."""
         for feature, case in self._UNSUPPORTED_SPECS.items():
             with self.subTest(feature=feature):
-                executor = LocalExecutor(work_dir='/tmp/unused')
+                executor = StandaloneExecutor(work_dir='/tmp/unused')
                 spec = executor.load_spec(case['yaml'])
                 executor._build_dag(spec)
                 with self.assertRaises(ValueError) as context:
-                    executor._validate_for_local(spec)
+                    executor._validate_for_standalone(spec)
                 self.assertIn(case['expected_substring'], str(context.exception))
 
 
@@ -1037,7 +1037,7 @@ class TestFilePathTraversal(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-traversal-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-traversal-')
 
     def tearDown(self):
         """Remove the temporary work directory."""
@@ -1058,7 +1058,7 @@ def test_path_traversal_rejected(self, mock_run):
                 - contents: "malicious"
                   path: /../../etc/evil.conf
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         executor._setup_directories()
@@ -1082,7 +1082,7 @@ def test_safe_nested_path_accepted(self, mock_run):
                 - contents: "safe"
                   path: /tmp/scripts/run.sh
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         executor._setup_directories()
@@ -1096,7 +1096,7 @@ class TestLeadTaskFailurePolicy(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory for lead-task policy tests."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-lead-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-lead-')
 
     def tearDown(self):
         """Remove the temporary work directory after each test."""
@@ -1123,7 +1123,7 @@ def test_nonlead_failure_ignored_when_flag_true(self, mock_run):
                   image: alpine:3.18
                   command: ["sh", "-c", "exit 1"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         self.assertTrue(executor.execute(spec))
 
@@ -1145,7 +1145,7 @@ def test_lead_failure_aborts_workflow(self, mock_run):
                   image: alpine:3.18
                   command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         self.assertFalse(executor.execute(spec))
 
@@ -1171,7 +1171,7 @@ def test_nonlead_failure_aborts_when_flag_false(self, mock_run):
                   image: alpine:3.18
                   command: ["sh", "-c", "exit 1"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         self.assertFalse(executor.execute(spec))
 
@@ -1205,7 +1205,7 @@ def test_nonlead_failure_does_not_block_downstream_group(self, mock_run):
                   inputs:
                   - task: leader
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         self.assertTrue(executor.execute(spec))
         self.assertEqual(mock_run.call_count, 3)
@@ -1222,7 +1222,7 @@ def test_single_task_group_failure_aborts(self, mock_run):
                 image: alpine:3.18
                 command: ["sh", "-c", "exit 1"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         self.assertFalse(executor.execute(spec))
 
@@ -1232,7 +1232,7 @@ class TestUnresolvedTokenDetection(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory for unresolved token tests."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-tokens-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-tokens-')
 
     def tearDown(self):
         """Remove the temporary work directory after each test."""
@@ -1249,7 +1249,7 @@ def test_jinja_variable_in_args_detected(self):
                 command: ["echo"]
                 args: ["{{experiment_name}}"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
@@ -1266,7 +1266,7 @@ def test_jinja_variable_in_command_detected(self):
                 image: "alpine:3.18"
                 command: ["{{my_binary}}"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
@@ -1284,7 +1284,7 @@ def test_jinja_variable_in_env_detected(self):
                 environment:
                   MY_VAR: "{{some_value}}"
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
@@ -1304,7 +1304,7 @@ def test_jinja_variable_in_file_contents_detected(self):
                     echo {{config_path}}/data
                   path: /tmp/run.sh
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
@@ -1321,7 +1321,7 @@ def test_typo_in_osmo_token_detected(self):
                 command: ["sh", "-c"]
                 args: ["echo data > {{ouptut}}/file.txt"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
@@ -1346,7 +1346,7 @@ def test_valid_osmo_tokens_not_flagged(self, mock_run):
                 inputs:
                 - task: producer
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         executor.execute(spec)
 
@@ -1360,7 +1360,7 @@ def test_error_message_suggests_dry_run(self):
                 image: "alpine:3.18"
                 command: ["echo", "{{missing}}"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
@@ -1372,7 +1372,7 @@ class TestShmSize(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory for shm-size tests."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-shm-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-shm-')
 
     def tearDown(self):
         """Remove the temporary work directory after each test."""
@@ -1394,7 +1394,7 @@ def test_gpu_task_gets_default_shm_size(self, mock_run):
                 resource: gpu-resource
                 command: ["python", "train.py"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         executor._setup_directories()
@@ -1422,7 +1422,7 @@ def test_gpu_task_gets_custom_shm_size(self, mock_run):
                 resource: gpu-resource
                 command: ["python", "train.py"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True, shm_size='32g')
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True, shm_size='32g')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         executor._setup_directories()
@@ -1446,7 +1446,7 @@ def test_non_gpu_task_has_no_default_shm_size(self, mock_run):
                 image: alpine:3.18
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         executor._setup_directories()
@@ -1468,7 +1468,7 @@ def test_non_gpu_task_gets_explicit_shm_size(self, mock_run):
                 image: alpine:3.18
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True, shm_size='8g')
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True, shm_size='8g')
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
         executor._setup_directories()
@@ -1504,7 +1504,7 @@ def test_jinja_block_detected(self):
         '''))
         try:
             with self.assertRaises(ValueError) as context:
-                run_workflow_locally(path)
+                run_workflow_standalone(path)
             self.assertIn('Jinja', str(context.exception))
         finally:
             os.unlink(path)
@@ -1522,7 +1522,7 @@ def test_jinja_comment_detected(self):
         '''))
         try:
             with self.assertRaises(ValueError) as context:
-                run_workflow_locally(path)
+                run_workflow_standalone(path)
             self.assertIn('Jinja', str(context.exception))
         finally:
             os.unlink(path)
@@ -1541,7 +1541,7 @@ def test_default_values_section_detected(self):
         '''))
         try:
             with self.assertRaises(ValueError) as context:
-                run_workflow_locally(path)
+                run_workflow_standalone(path)
             self.assertIn('Jinja', str(context.exception))
         finally:
             os.unlink(path)
@@ -1555,7 +1555,7 @@ class TestDockerNotFoundHandling(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-test-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-test-')
 
     def tearDown(self):
         """Remove the temporary work directory."""
@@ -1571,7 +1571,7 @@ def test_docker_not_found_graceful_failure(self):
                 image: alpine:3.18
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(
+        executor = StandaloneExecutor(
             work_dir=self.work_dir,
             keep_work_dir=True,
             docker_cmd='nonexistent-docker-binary-12345',
@@ -1591,18 +1591,18 @@ class TestCookbookSpecValidation(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory for cookbook validation tests."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-cookbook-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-cookbook-')
 
     def tearDown(self):
         """Remove the temporary work directory after each test."""
         shutil.rmtree(self.work_dir, ignore_errors=True)
 
     def _run_cookbook_spec(self, filename: str) -> bool:
-        """Execute a cookbook tutorial spec file through the local executor."""
+        """Execute a cookbook tutorial spec file through the standalone executor."""
         spec_path = os.path.join(self.COOKBOOK_DIR, filename)
         self.assertTrue(os.path.exists(spec_path),
                         f'Cookbook file not found: {spec_path}')
-        return run_workflow_locally(
+        return run_workflow_standalone(
             spec_path=spec_path,
             work_dir=self.work_dir,
             keep_work_dir=True,
@@ -1632,7 +1632,7 @@ def test_unsupported_spec_template(self):
         self.assertTrue(os.path.exists(spec_path),
                         f'Cookbook file not found: {spec_path}')
         with self.assertRaises(ValueError) as context:
-            run_workflow_locally(
+            run_workflow_standalone(
                 spec_path=spec_path,
                 work_dir=self.work_dir,
                 keep_work_dir=True,
@@ -1640,13 +1640,13 @@ def test_unsupported_spec_template(self):
         self.assertIn('Jinja', str(context.exception))
 
 
-class TestRunWorkflowLocallyErrors(unittest.TestCase):
-    """Test error handling in run_workflow_locally() that does not require Docker."""
+class TestRunWorkflowStandaloneErrors(unittest.TestCase):
+    """Test error handling in run_workflow_standalone() that does not require Docker."""
 
     def test_nonexistent_file_raises(self):
         """Passing a non-existent spec file path raises FileNotFoundError."""
         with self.assertRaises(FileNotFoundError):
-            run_workflow_locally(spec_path='/nonexistent/path/spec.yaml')
+            run_workflow_standalone(spec_path='/nonexistent/path/spec.yaml')
 
 
 # ============================================================================
@@ -1655,13 +1655,13 @@ def test_nonexistent_file_raises(self):
 @unittest.skipUnless(DOCKER_AVAILABLE, SKIP_DOCKER_MSG)
 class TestDockerExecution(unittest.TestCase):
     """
-    Integration tests that run real OSMO workflow specs through the local executor
+    Integration tests that run real OSMO workflow specs through the standalone executor
     using Docker. Each test uses a spec that would normally run on a Kubernetes cluster.
     """
 
     def setUp(self):
         """Create a temporary work directory for each Docker execution test."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-test-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-test-')
 
     def tearDown(self):
         """Remove the temporary work directory after each test."""
@@ -1669,7 +1669,7 @@ def tearDown(self):
 
     def _execute_spec(self, spec_text: str) -> bool:
         """Parse and execute a workflow spec string, returning the success status."""
-        executor = LocalExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        executor = StandaloneExecutor(work_dir=self.work_dir, keep_work_dir=True)
         spec = executor.load_spec(spec_text)
         return executor.execute(spec)
 
@@ -1961,8 +1961,6 @@ def test_parallel_failure_does_not_affect_independent_branch(self):
                 - task: root
         ''')
         result = self._execute_spec(spec_text)
-        # The executor should stop on first failure, so the overall result is False.
-        # root succeeds, then one of the branches fails.
         self.assertFalse(result)
 
     # ---- Groups (ganged tasks) tests ----
@@ -2078,7 +2076,7 @@ def test_file_contents_with_token_substitution(self):
     # ---- Resource spec ignored gracefully ----
 
     def test_resources_ignored_gracefully(self):
-        """Resource specs are K8s-specific; local executor should accept and ignore them."""
+        """Resource specs are K8s-specific; standalone executor should accept and ignore them."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: with-resources
@@ -2106,7 +2104,7 @@ def test_custom_docker_command(self):
                 image: alpine:3.18
                 command: ["echo", "ok"]
         ''')
-        executor = LocalExecutor(
+        executor = StandaloneExecutor(
             work_dir=self.work_dir,
             keep_work_dir=True,
             docker_cmd='docker',
@@ -2122,7 +2120,7 @@ def test_custom_docker_command(self):
 class TestCookbookSpecs(unittest.TestCase):
     """
     Run real OSMO cookbook YAML specs that are designed for Kubernetes clusters,
-    and verify they execute successfully in the local Docker executor.
+    and verify they execute successfully in the standalone Docker executor.
     """
 
     COOKBOOK_DIR = os.path.join(os.path.dirname(__file__), '..', '..', '..',
@@ -2130,18 +2128,18 @@ class TestCookbookSpecs(unittest.TestCase):
 
     def setUp(self):
         """Create a temporary work directory for cookbook spec tests."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-cookbook-')
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-cookbook-')
 
     def tearDown(self):
         """Remove the temporary work directory after each cookbook test."""
         shutil.rmtree(self.work_dir, ignore_errors=True)
 
     def _run_cookbook_spec(self, filename: str) -> bool:
-        """Execute a cookbook tutorial spec file through the local executor."""
+        """Execute a cookbook tutorial spec file through the standalone executor."""
         spec_path = os.path.join(self.COOKBOOK_DIR, filename)
         self.assertTrue(os.path.exists(spec_path),
                         f'Cookbook file not found: {spec_path}')
-        return run_workflow_locally(
+        return run_workflow_standalone(
             spec_path=spec_path,
             work_dir=self.work_dir,
             keep_work_dir=True,
@@ -2173,15 +2171,15 @@ def test_combination_workflow_simple_yaml(self):
 
 
 # ============================================================================
-# run_workflow_locally() integration tests
+# run_workflow_standalone() integration tests
 # ============================================================================
 @unittest.skipUnless(DOCKER_AVAILABLE, SKIP_DOCKER_MSG)
-class TestRunWorkflowLocally(unittest.TestCase):
-    """Test the top-level run_workflow_locally() convenience function."""
+class TestRunWorkflowStandalone(unittest.TestCase):
+    """Test the top-level run_workflow_standalone() convenience function."""
 
     def setUp(self):
-        """Create a temporary work directory for run_workflow_locally tests."""
-        self.work_dir = tempfile.mkdtemp(prefix='osmo-local-func-')
+        """Create a temporary work directory for run_workflow_standalone tests."""
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-standalone-func-')
 
     def tearDown(self):
         """Remove the temporary work directory after each test."""
@@ -2189,7 +2187,7 @@ def tearDown(self):
 
     def test_caller_supplied_work_dir_preserved_on_success(self):
         """A caller-supplied work_dir is never deleted, even with keep_work_dir=False."""
-        work_dir = tempfile.mkdtemp(prefix='osmo-local-cleanup-')
+        work_dir = tempfile.mkdtemp(prefix='osmo-standalone-cleanup-')
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
             f.write(textwrap.dedent('''\
                 workflow:
@@ -2201,7 +2199,7 @@ def test_caller_supplied_work_dir_preserved_on_success(self):
             '''))
             spec_path = f.name
         try:
-            result = run_workflow_locally(
+            result = run_workflow_standalone(
                 spec_path=spec_path,
                 work_dir=work_dir,
                 keep_work_dir=False,
@@ -2226,7 +2224,7 @@ def test_failure_preserves_work_dir(self):
             '''))
             spec_path = f.name
         try:
-            result = run_workflow_locally(
+            result = run_workflow_standalone(
                 spec_path=spec_path,
                 work_dir=self.work_dir,
                 keep_work_dir=False,
@@ -2249,7 +2247,7 @@ def test_keep_flag_preserves_on_success(self):
             '''))
             spec_path = f.name
         try:
-            result = run_workflow_locally(
+            result = run_workflow_standalone(
                 spec_path=spec_path,
                 work_dir=self.work_dir,
                 keep_work_dir=True,

From 1b705ed6fecc2ad619c97a65ea4b453b555dd1eb Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 9 Apr 2026 14:11:19 -0700
Subject: [PATCH 22/35] Add Docker Compose support for parallel workflow
 execution

- Introduced `ComposeExecutor` to enable Docker Compose-based parallel execution of workflows, extending the functionality of `StandaloneExecutor`.
- Updated `AGENTS.md` to document the new `ComposeExecutor` and its capabilities, including wave-parallel scheduling and GPU support.
- Added a new `docker_compose.py` CLI subcommand for executing workflows using Docker Compose.
- Implemented tests for `ComposeExecutor` to validate functionality and ensure correct generation of Docker Compose files.
- Adjusted build configurations to include the new `compose_executor` library and corresponding tests.
---
 AGENTS.md                                |    2 +
 src/cli/docker_compose.py                |   85 ++
 src/cli/main_parser.py                   |    2 +
 src/utils/BUILD                          |   11 +
 src/utils/compose_executor.py            |  527 +++++++++++
 src/utils/tests/BUILD                    |   10 +
 src/utils/tests/test_compose_executor.py | 1073 ++++++++++++++++++++++
 7 files changed, 1710 insertions(+)
 create mode 100644 src/cli/docker_compose.py
 create mode 100644 src/utils/compose_executor.py
 create mode 100644 src/utils/tests/test_compose_executor.py

diff --git a/AGENTS.md b/AGENTS.md
index 90a401f2b..67e102378 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -121,6 +121,7 @@ Entry point: `service/core/service.py`. Framework: FastAPI + Uvicorn + OpenTelem
 | `utils/connectors/` | `ClusterConnector`, `PostgresConnector`, `RedisConnector` | K8s API wrapper, PostgreSQL operations, Redis job queue management. |
 | `utils/secret_manager/` | `SecretManager` | JWE-based secret encryption/decryption. MEK/UEK key management. |
 | `utils/standalone_executor.py` | `StandaloneExecutor`, `run_workflow_standalone` | Standalone Docker-based workflow execution. Runs workflow specs without Kubernetes by mapping tasks to `docker run` commands with volume mounts for data flow. Supports DAG scheduling, resume (`--from-step`), and GPU passthrough. |
+| `utils/compose_executor.py` | `ComposeExecutor`, `run_workflow_compose` | Docker Compose-based parallel workflow execution. Extends StandaloneExecutor with wave-parallel scheduling, `{{host:taskname}}` DNS resolution via shared Compose networks, and GPU support via deploy resource reservations. |
 | `utils/progress_check/` | — | Liveness/progress tracking for long-running services. |
 | `utils/metrics/` | — | Prometheus metrics collection and export. |
 
@@ -141,6 +142,7 @@ Entry point: `cli.py` → `main_parser.py` (argparse). Subcommand modules:
 | `pool.py`, `resources.py`, `user.py`, `credential.py`, `access_token.py`, `bucket.py`, `task.py`, `version.py` | Supporting commands              |
 | `backend.py`                                                                                                   | Backend cluster management       |
 | `standalone.py`                                                                                                | Standalone workflow execution via Docker (`osmo standalone run`) |
+| `docker_compose.py`                                                                                            | Parallel workflow execution via Docker Compose (`osmo docker-compose run`) |
 
 Features: Tab completion (shtab), response formatting (`formatters.py`), spec editor (`editor.py`), PyInstaller packaging (`cli_builder.py`, `packaging/`).
 
diff --git a/src/cli/docker_compose.py b/src/cli/docker_compose.py
new file mode 100644
index 000000000..ab44e4f62
--- /dev/null
+++ b/src/cli/docker_compose.py
@@ -0,0 +1,85 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  # pylint: disable=line-too-long
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import argparse
+import sys
+
+import shtab
+
+from src.utils import compose_executor
+
+
+def setup_parser(parser: argparse._SubParsersAction):
+    """Register the 'docker-compose' subcommand for parallel workflow execution."""
+    dc_parser = parser.add_parser(
+        'docker-compose',
+        help='Run workflows using Docker Compose for parallel execution '
+             '(no Kubernetes cluster required).')
+    subparsers = dc_parser.add_subparsers(dest='command')
+    subparsers.required = True
+
+    run_parser = subparsers.add_parser(
+        'run',
+        help='Execute a workflow spec using Docker Compose for parallel task execution.')
+    run_parser.add_argument(
+        '-f', '--file',
+        required=True,
+        dest='workflow_file',
+        help='Path to the workflow YAML spec file.').complete = shtab.FILE
+    run_parser.add_argument(
+        '--work-dir',
+        dest='work_dir',
+        default=None,
+        help='Directory for task inputs/outputs and the generated docker-compose.yml. '
+             'Defaults to a temporary directory.')
+    run_parser.add_argument(
+        '--keep',
+        action='store_true',
+        default=False,
+        help='Keep the work directory after execution (always kept on failure).')
+    run_parser.add_argument(
+        '--compose-cmd',
+        dest='compose_cmd',
+        default='docker compose',
+        help='Docker Compose command to use (e.g. "docker-compose" for V1). '
+             'Default: "docker compose".')
+    run_parser.add_argument(
+        '--shm-size',
+        dest='shm_size',
+        default=None,
+        help='Shared memory size for GPU containers (e.g. 16g, 32g). '
+             'Defaults to 16g for tasks that request GPUs.')
+    run_parser.set_defaults(func=_run_compose)
+
+
+def _run_compose(service_client, args: argparse.Namespace):
+    """Execute a workflow via Docker Compose using the parsed CLI arguments."""
+    try:
+        success = compose_executor.run_workflow_compose(
+            spec_path=args.workflow_file,
+            work_dir=args.work_dir,
+            keep_work_dir=args.keep,
+            compose_cmd=args.compose_cmd,
+            shm_size=args.shm_size,
+        )
+    except (ValueError, FileNotFoundError, PermissionError) as error:
+        print(f'Error: {error}', file=sys.stderr)
+        sys.exit(1)
+
+    if not success:
+        sys.exit(1)
diff --git a/src/cli/main_parser.py b/src/cli/main_parser.py
index 59dfc0043..654673923 100644
--- a/src/cli/main_parser.py
+++ b/src/cli/main_parser.py
@@ -28,6 +28,7 @@
     credential,
     data,
     dataset,
+    docker_compose,
     standalone,
     login,
     pool,
@@ -58,6 +59,7 @@
     user.setup_parser,
     config.setup_parser,
     standalone.setup_parser,
+    docker_compose.setup_parser,
 )
 
 
diff --git a/src/utils/BUILD b/src/utils/BUILD
index 8dbf59a94..3e5eaae19 100644
--- a/src/utils/BUILD
+++ b/src/utils/BUILD
@@ -136,3 +136,14 @@ osmo_py_library(
     ],
     visibility = ["//visibility:public"],
 )
+
+osmo_py_library(
+    name = "compose_executor",
+    srcs = ["compose_executor.py"],
+    deps = [
+        requirement("pyyaml"),
+        "//src/utils:standalone_executor",
+        "//src/utils/job",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
new file mode 100644
index 000000000..928ddcada
--- /dev/null
+++ b/src/utils/compose_executor.py
@@ -0,0 +1,527 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  # pylint: disable=line-too-long
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import json
+import logging
+import os
+import re
+import shutil
+import subprocess
+import tempfile
+from typing import Dict, List
+
+import yaml
+
+from src.utils.job import task as task_module
+from src.utils.job import workflow as workflow_module
+from src.utils.standalone_executor import (
+    CONTAINER_DATA_PATH,
+    StandaloneExecutor,
+    TaskNode,
+    TaskResult,
+)
+
+
+logger = logging.getLogger(__name__)
+
+COMPOSE_FILE_NAME = 'docker-compose.yml'
+
+
+class ComposeExecutor(StandaloneExecutor):
+    """
+    Executes an OSMO workflow spec using Docker Compose for parallel task execution.
+
+    Extends StandaloneExecutor with:
+      - True parallel execution of independent tasks within each scheduling wave
+      - {{host:taskname}} token support via Docker Compose DNS
+      - Shared network per task group for gang-scheduled communication
+      - GPU passthrough via compose deploy.resources.reservations
+
+    Execution model:
+      Generates a single docker-compose.yml with all services defined up-front,
+      then executes them in waves.  Each wave contains all tasks whose upstream
+      dependencies are satisfied.  Tasks within a wave run in parallel via
+      ``docker compose up``.  Group co-scheduling is enforced so that all members
+      of a multi-task group start together in the same wave.
+    """
+
+    def __init__(self, work_dir: str, keep_work_dir: bool = False,
+                 compose_cmd: str = 'docker compose', shm_size: str | None = None):
+        super().__init__(work_dir=work_dir, keep_work_dir=keep_work_dir,
+                         docker_cmd='docker', shm_size=shm_size)
+        self._compose_cmd = compose_cmd
+
+    @property
+    def _compose_file_path(self) -> str:
+        return os.path.join(self._work_dir, COMPOSE_FILE_NAME)
+
+    def _compose_project_name(self, spec: workflow_module.WorkflowSpec) -> str:
+        return f'osmo-{re.sub(r"[^a-z0-9-]", "-", spec.name.lower())}'
+
+    def _compose_base_cmd(self, spec: workflow_module.WorkflowSpec) -> List[str]:
+        return (
+            self._compose_cmd.split()
+            + ['-p', self._compose_project_name(spec), '-f', self._compose_file_path]
+        )
+
+    # ------------------------------------------------------------------
+    # Execution
+    # ------------------------------------------------------------------
+
+    def execute(self, spec: workflow_module.WorkflowSpec,
+                resume: bool = False, from_step: str | None = None) -> bool:
+        """Run all tasks in wave-parallel order via Docker Compose."""
+        self._results.clear()
+        self._build_dag(spec)
+        self._validate_for_compose(spec)
+        self._setup_directories()
+        self._write_inline_files(spec)
+        self._generate_compose_file(spec)
+
+        total_tasks = sum(len(g.tasks) for g in self._groups(spec))
+        logger.info('Workflow "%s": %d task(s) across %d group(s) [docker-compose mode]',
+                     spec.name, total_tasks, len(self._groups(spec)))
+
+        try:
+            wave_number = 0
+            while True:
+                wave = self._find_ready_wave()
+                if not wave:
+                    break
+
+                wave_number += 1
+                logger.info('=== Wave %d: %s ===', wave_number, ', '.join(wave))
+
+                wave_results = self._run_wave(wave, spec)
+
+                fatal_failure = False
+                for task_name, exit_code in wave_results.items():
+                    output_dir = os.path.join(self._work_dir, task_name, 'output')
+                    self._results[task_name] = TaskResult(
+                        name=task_name, exit_code=exit_code, output_dir=output_dir)
+
+                    if exit_code != 0:
+                        if self._is_nonlead_failure_ignorable(task_name):
+                            logger.warning(
+                                'Non-lead task "%s" failed with exit code %d '
+                                '(ignored — group "%s" has ignoreNonleadStatus=true)',
+                                task_name, exit_code, self._task_nodes[task_name].group)
+                        else:
+                            logger.error('Task "%s" failed with exit code %d',
+                                         task_name, exit_code)
+                            self._cancel_downstream(task_name)
+                            fatal_failure = True
+                    else:
+                        logger.info('Task "%s" completed successfully', task_name)
+
+                if fatal_failure:
+                    return False
+
+            unexecuted = set(self._task_nodes.keys()) - set(self._results.keys())
+            if unexecuted:
+                logger.error(
+                    'Workflow "%s" stalled — tasks could not be scheduled '
+                    '(possible cycle or unsatisfiable group): %s',
+                    spec.name, ', '.join(sorted(unexecuted)))
+                return False
+
+            fatal_failures = [
+                name for name, result in self._results.items()
+                if result.exit_code != 0
+                and not self._is_nonlead_failure_ignorable(name)
+            ]
+            if fatal_failures:
+                logger.error('Workflow failed. Failed tasks: %s',
+                             ', '.join(fatal_failures))
+                return False
+
+            logger.info('Workflow "%s" completed successfully', spec.name)
+            return True
+        finally:
+            self._compose_cleanup(spec)
+
+    # ------------------------------------------------------------------
+    # Validation
+    # ------------------------------------------------------------------
+
+    def _validate_for_compose(self, spec: workflow_module.WorkflowSpec):
+        """Reject cluster-only features while allowing {{host:}} tokens."""
+        unsupported_features: List[str] = []
+        for group in self._groups(spec):
+            for task_spec in group.tasks:
+                for input_source in task_spec.inputs:
+                    if isinstance(input_source, task_module.DatasetInputOutput):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": dataset inputs require object storage')
+                    elif isinstance(input_source, task_module.URLInputOutput):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": URL inputs require network/storage access')
+
+                for output in task_spec.outputs:
+                    if isinstance(output, (task_module.DatasetInputOutput,
+                                           task_module.URLInputOutput)):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": dataset/URL outputs require object storage')
+
+                if task_spec.credentials:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": credentials require the OSMO secret manager')
+
+                if task_spec.checkpoint:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": checkpoints require object storage')
+
+                if task_spec.volumeMounts:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": volumeMounts require cluster-level host paths')
+
+                if task_spec.privileged:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": privileged containers are not '
+                        f'supported in docker-compose mode')
+
+                if task_spec.hostNetwork:
+                    unsupported_features.append(
+                        f'Task "{task_spec.name}": hostNetwork is not supported '
+                        f'in docker-compose mode')
+
+                self._validate_host_tokens(task_spec, group)
+
+        if unsupported_features:
+            raise ValueError(
+                'The following features are not supported in docker-compose '
+                'execution mode:\n  - '
+                + '\n  - '.join(unsupported_features))
+
+    _HOST_TOKEN_NAME_PATTERN = re.compile(r'\{\{\s*host:(\S+)\s*\}\}')
+
+    def _validate_host_tokens(self, task_spec: task_module.TaskSpec,
+                              group: task_module.TaskGroupSpec):
+        """Ensure {{host:taskname}} tokens only reference tasks in the same group."""
+        group_task_names = {t.name for t in group.tasks}
+        fields_to_check = list(task_spec.command) + list(task_spec.args)
+        fields_to_check += list(task_spec.environment.values())
+        fields_to_check += [file_spec.contents for file_spec in task_spec.files]
+
+        for field in fields_to_check:
+            for match in self._HOST_TOKEN_NAME_PATTERN.finditer(field):
+                referenced_task = match.group(1)
+                if referenced_task not in group_task_names:
+                    raise ValueError(
+                        f'Task "{task_spec.name}": {{{{host:{referenced_task}}}}} '
+                        f'references a task outside its group "{group.name}". '
+                        f'Host tokens can only reference tasks within the same group.')
+
+    # ------------------------------------------------------------------
+    # Token map (extended with {{host:taskname}})
+    # ------------------------------------------------------------------
+
+    def _build_token_map(self, node: TaskNode) -> Dict[str, str]:
+        tokens = super()._build_token_map(node)
+        group_spec = self._group_specs[node.group]
+        for task_spec in group_spec.tasks:
+            tokens[f'host:{task_spec.name}'] = task_spec.name
+        return tokens
+
+    # ------------------------------------------------------------------
+    # Inline files
+    # ------------------------------------------------------------------
+
+    def _write_inline_files(self, spec: workflow_module.WorkflowSpec):
+        """Write all inline file specs to disk with token substitution."""
+        for group in self._groups(spec):
+            for task_spec in group.tasks:
+                node = self._task_nodes[task_spec.name]
+                token_map = self._build_token_map(node)
+                files_dir = os.path.join(self._work_dir, task_spec.name, 'files')
+                os.makedirs(files_dir, exist_ok=True)
+
+                for file_spec in task_spec.files:
+                    resolved_contents = self._substitute_tokens(
+                        file_spec.contents, token_map)
+                    host_path = os.path.realpath(
+                        os.path.join(files_dir, file_spec.path.lstrip('/')))
+                    if not host_path.startswith(os.path.realpath(files_dir) + os.sep):
+                        raise ValueError(
+                            f'Task "{task_spec.name}": file path '
+                            f'"{file_spec.path}" escapes the task directory')
+                    os.makedirs(os.path.dirname(host_path), exist_ok=True)
+                    with open(host_path, 'w', encoding='utf-8') as f:
+                        f.write(resolved_contents)
+
+    # ------------------------------------------------------------------
+    # Compose file generation
+    # ------------------------------------------------------------------
+
+    def _generate_compose_file(self, spec: workflow_module.WorkflowSpec):
+        """Write a docker-compose.yml containing every task as a service."""
+        compose: Dict = {'services': {}}
+        networks_needed: set = set()
+
+        for task_name, node in self._task_nodes.items():
+            service = self._build_compose_service(node, spec)
+            compose['services'][task_name] = service
+            networks_needed.add(node.group)
+
+        if networks_needed:
+            compose['networks'] = {
+                name: {'driver': 'bridge'}
+                for name in sorted(networks_needed)
+            }
+
+        with open(self._compose_file_path, 'w', encoding='utf-8') as f:
+            yaml.safe_dump(compose, f, default_flow_style=False, sort_keys=False)
+
+        logger.info('Generated compose file: %s', self._compose_file_path)
+
+    @staticmethod
+    def _escape_compose_interpolation(text: str) -> str:
+        """Escape ``$`` as ``$$`` so Docker Compose passes them literally to the container."""
+        return text.replace('$', '$$')
+
+    def _build_compose_service(self, node: TaskNode,
+                               spec: workflow_module.WorkflowSpec) -> Dict:
+        """Build a single Docker Compose service definition for a task."""
+        task_spec = node.spec
+        token_map = self._build_token_map(node)
+
+        resolved_command = [
+            self._substitute_tokens(c, token_map) for c in task_spec.command]
+        resolved_args = [
+            self._substitute_tokens(a, token_map) for a in task_spec.args]
+        resolved_environment = {
+            key: self._substitute_tokens(value, token_map)
+            for key, value in task_spec.environment.items()
+        }
+
+        all_resolved = (
+            resolved_command + resolved_args + list(resolved_environment.values())
+            + [self._substitute_tokens(f.contents, token_map)
+               for f in task_spec.files]
+        )
+        self._check_unresolved_tokens(node.name, all_resolved)
+
+        esc = self._escape_compose_interpolation
+
+        service: Dict = {'image': task_spec.image}
+
+        if resolved_command:
+            service['entrypoint'] = [esc(resolved_command[0])]
+            trailing = resolved_command[1:] + resolved_args
+            if trailing:
+                service['command'] = [esc(t) for t in trailing]
+        elif resolved_args:
+            service['command'] = [esc(a) for a in resolved_args]
+
+        if resolved_environment:
+            service['environment'] = {
+                k: esc(v) for k, v in resolved_environment.items()
+            }
+
+        volumes: List[str] = []
+        task_dir = os.path.abspath(os.path.join(self._work_dir, node.name))
+        output_dir = os.path.join(task_dir, 'output')
+        volumes.append(f'{output_dir}:{CONTAINER_DATA_PATH}/output')
+
+        for index, input_source in enumerate(task_spec.inputs):
+            if isinstance(input_source, task_module.TaskInputOutput):
+                upstream_output = os.path.abspath(
+                    os.path.join(self._work_dir, input_source.task, 'output'))
+                volumes.append(
+                    f'{upstream_output}:{CONTAINER_DATA_PATH}/input/{index}:ro')
+
+        files_dir = os.path.join(task_dir, 'files')
+        for file_spec in task_spec.files:
+            host_path = os.path.realpath(
+                os.path.join(files_dir, file_spec.path.lstrip('/')))
+            volumes.append(f'{host_path}:{file_spec.path}:ro')
+
+        if volumes:
+            service['volumes'] = volumes
+
+        service['networks'] = [node.group]
+
+        gpu_count = self._task_gpu_count(task_spec, spec)
+        if gpu_count > 0:
+            service['deploy'] = {
+                'resources': {
+                    'reservations': {
+                        'devices': [{
+                            'driver': 'nvidia',
+                            'count': gpu_count,
+                            'capabilities': ['gpu'],
+                        }]
+                    }
+                }
+            }
+            service['shm_size'] = self._shm_size or self.DEFAULT_SHM_SIZE
+        elif self._shm_size:
+            service['shm_size'] = self._shm_size
+
+        return service
+
+    # ------------------------------------------------------------------
+    # Wave scheduling
+    # ------------------------------------------------------------------
+
+    def _find_ready_wave(self) -> List[str]:
+        """
+        Return the next batch of tasks to run in parallel.
+
+        All members of a multi-task group are co-scheduled: a group is only
+        included when every unfinished member has its upstream dependencies
+        satisfied.  If co-scheduling stalls (e.g. cross-group edges inside a
+        multi-task group), we fall back to plain task-level readiness to avoid
+        deadlocks.
+        """
+        ready_tasks = self._find_ready_tasks()
+        if not ready_tasks:
+            return []
+
+        ready_set = set(ready_tasks)
+
+        groups_with_ready: Dict[str, List[str]] = {}
+        for task_name in ready_tasks:
+            group = self._task_nodes[task_name].group
+            groups_with_ready.setdefault(group, []).append(task_name)
+
+        wave: List[str] = []
+        for group_name, group_ready in groups_with_ready.items():
+            group_spec = self._group_specs[group_name]
+            all_members = {t.name for t in group_spec.tasks}
+            unfinished = all_members - set(self._results.keys())
+
+            if unfinished.issubset(ready_set):
+                wave.extend(sorted(unfinished))
+            elif len(all_members) == 1:
+                wave.extend(group_ready)
+
+        if not wave and ready_tasks:
+            wave = ready_tasks
+
+        return wave
+
+    # ------------------------------------------------------------------
+    # Wave execution
+    # ------------------------------------------------------------------
+
+    def _run_wave(self, task_names: List[str],
+                  spec: workflow_module.WorkflowSpec) -> Dict[str, int]:
+        """Start *task_names* in parallel and block until they all exit."""
+        base_cmd = self._compose_base_cmd(spec)
+
+        up_cmd = base_cmd + ['up', '--no-deps', '--no-log-prefix'] + list(task_names)
+        logger.debug('Compose command: %s', ' '.join(up_cmd))
+
+        try:
+            subprocess.run(up_cmd, check=False)
+        except FileNotFoundError:
+            logger.error(
+                'Docker Compose not found. Is "%s" available in your PATH?',
+                self._compose_cmd)
+            return {name: 127 for name in task_names}
+
+        results: Dict[str, int] = {}
+        for task_name in task_names:
+            results[task_name] = self._get_service_exit_code(task_name, spec)
+
+        rm_cmd = base_cmd + ['rm', '-f'] + list(task_names)
+        subprocess.run(rm_cmd, capture_output=True, check=False)
+
+        return results
+
+    def _get_service_exit_code(self, service_name: str,
+                               spec: workflow_module.WorkflowSpec) -> int:
+        """Query Docker Compose for the exit code of *service_name*."""
+        ps_cmd = self._compose_base_cmd(spec) + [
+            'ps', '-a', '--format', 'json', service_name,
+        ]
+        try:
+            result = subprocess.run(
+                ps_cmd, capture_output=True, text=True, timeout=30)
+            if result.returncode != 0:
+                logger.warning('Failed to query exit code for "%s": %s',
+                               service_name, result.stderr.strip())
+                return 1
+
+            for line in result.stdout.strip().splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    container_info = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                if isinstance(container_info, list):
+                    for entry in container_info:
+                        if entry.get('Service') == service_name:
+                            return entry.get('ExitCode', 1)
+                elif container_info.get('Service') == service_name:
+                    return container_info.get('ExitCode', 1)
+
+            logger.warning('No container info found for service "%s"', service_name)
+            return 1
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            logger.warning('Could not determine exit code for "%s"', service_name)
+            return 1
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def _compose_cleanup(self, spec: workflow_module.WorkflowSpec):
+        """Tear down containers and networks created by Docker Compose."""
+        down_cmd = self._compose_base_cmd(spec) + ['down', '--remove-orphans']
+        try:
+            subprocess.run(down_cmd, capture_output=True, timeout=60, check=False)
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            logger.warning('Failed to clean up Docker Compose resources')
+
+
+def run_workflow_compose(spec_path: str, work_dir: str | None = None,
+                         keep_work_dir: bool = False,
+                         compose_cmd: str = 'docker compose',
+                         shm_size: str | None = None) -> bool:
+    """Load a workflow spec and execute it via Docker Compose."""
+    with open(spec_path, encoding='utf-8') as f:
+        spec_text = f.read()
+
+    template_markers = ('{%', '{#', 'default-values')
+    if any(marker in spec_text for marker in template_markers):
+        raise ValueError(
+            'This spec uses Jinja templates which require server-side expansion.\n'
+            'Run "osmo workflow submit --dry-run -f <spec>" first to get the '
+            'expanded spec,\nthen save that output and run it with docker-compose.')
+
+    created_work_dir = work_dir is None
+    if work_dir is None:
+        work_dir = tempfile.mkdtemp(prefix='osmo-compose-')
+        logger.info('Using temporary work directory: %s', work_dir)
+
+    executor = ComposeExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
+                                compose_cmd=compose_cmd, shm_size=shm_size)
+    spec = executor.load_spec(spec_text)
+    success = executor.execute(spec)
+
+    if created_work_dir and not keep_work_dir and success:
+        logger.info('Cleaning up work directory: %s', work_dir)
+        shutil.rmtree(work_dir, ignore_errors=True)
+    elif not success:
+        logger.info('Work directory preserved for debugging: %s', work_dir)
+
+    return success
diff --git a/src/utils/tests/BUILD b/src/utils/tests/BUILD
index b555f85bc..f6b8d375e 100644
--- a/src/utils/tests/BUILD
+++ b/src/utils/tests/BUILD
@@ -66,3 +66,13 @@ py_test(
     ],
     local = True,
 )
+
+py_test(
+    name = "test_compose_executor",
+    srcs = ["test_compose_executor.py"],
+    deps = [
+        "//src/utils:compose_executor",
+        "//src/utils:standalone_executor",
+    ],
+    local = True,
+)
diff --git a/src/utils/tests/test_compose_executor.py b/src/utils/tests/test_compose_executor.py
new file mode 100644
index 000000000..e824d2b9a
--- /dev/null
+++ b/src/utils/tests/test_compose_executor.py
@@ -0,0 +1,1073 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  # pylint: disable=line-too-long
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import json
+import os
+import shutil
+import subprocess
+import tempfile
+import textwrap
+import unittest
+from unittest import mock
+
+import yaml
+
+from src.utils.compose_executor import (
+    COMPOSE_FILE_NAME,
+    ComposeExecutor,
+    run_workflow_compose,
+)
+from src.utils.standalone_executor import CONTAINER_DATA_PATH, TaskResult
+
+
+def _docker_compose_available() -> bool:
+    """Return True if Docker Compose V2 is available."""
+    try:
+        result = subprocess.run(
+            ['docker', 'compose', 'version'],
+            capture_output=True,
+            timeout=10,
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+DOCKER_COMPOSE_AVAILABLE = _docker_compose_available()
+SKIP_COMPOSE_MSG = 'Docker Compose is not available on this machine'
+
+
+# ============================================================================
+# Unit tests — no Docker required
+# ============================================================================
+
+
+class TestComposeFileGeneration(unittest.TestCase):
+    """Verify that the generated docker-compose.yml matches the workflow spec."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-compose-test-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def _make_executor(self) -> ComposeExecutor:
+        return ComposeExecutor(work_dir=self.work_dir, keep_work_dir=True)
+
+    def _generate_and_load(self, spec_text: str) -> dict:
+        """Parse spec, build DAG, generate compose file, return parsed YAML."""
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_compose(spec)
+        executor._setup_directories()
+        executor._write_inline_files(spec)
+        executor._generate_compose_file(spec)
+        compose_path = os.path.join(self.work_dir, COMPOSE_FILE_NAME)
+        with open(compose_path, encoding='utf-8') as f:
+            return yaml.safe_load(f)
+
+    def test_single_task_generates_one_service(self):
+        """A single-task workflow produces a compose file with one service."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: hello
+              tasks:
+              - name: greet
+                image: alpine:3.18
+                command: ["echo", "hello"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        self.assertIn('greet', compose['services'])
+        self.assertEqual(len(compose['services']), 1)
+        svc = compose['services']['greet']
+        self.assertEqual(svc['image'], 'alpine:3.18')
+        self.assertEqual(svc['entrypoint'], ['echo'])
+        self.assertEqual(svc['command'], ['hello'])
+
+    def test_parallel_tasks_generate_separate_services(self):
+        """Independent tasks produce separate services with no depends_on."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: parallel
+              tasks:
+              - name: task-a
+                image: alpine:3.18
+                command: ["echo", "a"]
+              - name: task-b
+                image: alpine:3.18
+                command: ["echo", "b"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        self.assertEqual(len(compose['services']), 2)
+        self.assertIn('task-a', compose['services'])
+        self.assertIn('task-b', compose['services'])
+        for svc in compose['services'].values():
+            self.assertNotIn('depends_on', svc)
+
+    def test_volumes_for_output(self):
+        """Each service has an output volume mapping to the host work directory."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: vol-test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        svc = compose['services']['task']
+        output_volume = f'{os.path.abspath(os.path.join(self.work_dir, "task", "output"))}:{CONTAINER_DATA_PATH}/output'
+        self.assertIn(output_volume, svc['volumes'])
+
+    def test_upstream_input_volumes(self):
+        """A consumer task mounts its upstream task's output as a read-only input."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["echo"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: producer
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        consumer = compose['services']['consumer']
+        upstream_output = os.path.abspath(
+            os.path.join(self.work_dir, 'producer', 'output'))
+        expected_volume = f'{upstream_output}:{CONTAINER_DATA_PATH}/input/0:ro'
+        self.assertIn(expected_volume, consumer['volumes'])
+
+    def test_environment_variables_included(self):
+        """Environment variables from the spec appear in the compose service."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: env-test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["printenv"]
+                environment:
+                  FOO: bar
+                  BAZ: "42"
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        svc = compose['services']['task']
+        self.assertEqual(svc['environment']['FOO'], 'bar')
+        self.assertEqual(svc['environment']['BAZ'], '42')
+
+    def test_inline_files_mounted(self):
+        """Inline files are written to disk and bind-mounted into the service."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: files-test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["sh", "/tmp/run.sh"]
+                files:
+                - contents: echo hello
+                  path: /tmp/run.sh
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        svc = compose['services']['task']
+        file_volumes = [v for v in svc['volumes'] if '/tmp/run.sh:ro' in v]
+        self.assertEqual(len(file_volumes), 1)
+
+        host_path = file_volumes[0].split(':')[0]
+        self.assertTrue(os.path.exists(host_path))
+        with open(host_path, encoding='utf-8') as f:
+            self.assertEqual(f.read(), 'echo hello')
+
+    def test_group_network_assigned(self):
+        """Tasks in a group share a compose network named after the group."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: grouped
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["echo"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        self.assertIn('workers', compose.get('networks', {}))
+        self.assertEqual(compose['services']['leader']['networks'], ['workers'])
+        self.assertEqual(compose['services']['follower']['networks'], ['workers'])
+
+    def test_gpu_resources_in_compose(self):
+        """GPU tasks get deploy.resources.reservations.devices and shm_size."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: gpu-test
+              resources:
+                gpu-res:
+                  gpu: 2
+              tasks:
+              - name: train
+                image: pytorch:latest
+                resource: gpu-res
+                command: ["python", "train.py"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        svc = compose['services']['train']
+        devices = svc['deploy']['resources']['reservations']['devices']
+        self.assertEqual(len(devices), 1)
+        self.assertEqual(devices[0]['driver'], 'nvidia')
+        self.assertEqual(devices[0]['count'], 2)
+        self.assertIn('gpu', devices[0]['capabilities'])
+        self.assertEqual(svc['shm_size'], '16g')
+
+    def test_custom_shm_size(self):
+        """A user-specified shm_size overrides the default for GPU tasks."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: shm-test
+              resources:
+                gpu-res:
+                  gpu: 1
+              tasks:
+              - name: train
+                image: pytorch:latest
+                resource: gpu-res
+                command: ["python"]
+        ''')
+        executor = ComposeExecutor(
+            work_dir=self.work_dir, keep_work_dir=True, shm_size='32g')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_compose(spec)
+        executor._setup_directories()
+        executor._generate_compose_file(spec)
+
+        compose_path = os.path.join(self.work_dir, COMPOSE_FILE_NAME)
+        with open(compose_path, encoding='utf-8') as f:
+            compose = yaml.safe_load(f)
+        self.assertEqual(compose['services']['train']['shm_size'], '32g')
+
+    def test_non_gpu_task_no_deploy_section(self):
+        """A CPU-only task has no deploy section in the compose service."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: cpu-test
+              tasks:
+              - name: preprocess
+                image: alpine:3.18
+                command: ["echo"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+        self.assertNotIn('deploy', compose['services']['preprocess'])
+
+    def test_entrypoint_and_command_split(self):
+        """The task command is split into entrypoint (first element) and command (rest + args)."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: split-test
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["bash", "-c"]
+                args: ["echo hello"]
+        ''')
+        compose = self._generate_and_load(spec_text)
+
+        svc = compose['services']['task']
+        self.assertEqual(svc['entrypoint'], ['bash'])
+        self.assertEqual(svc['command'], ['-c', 'echo hello'])
+
+
+class TestComposeTokenMap(unittest.TestCase):
+    """Verify that the token map includes {{host:taskname}} for same-group tasks."""
+
+    def test_host_tokens_for_group_members(self):
+        """Tasks in the same group get host tokens for all group members."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: host-tokens
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+                - name: worker-a
+                  image: alpine:3.18
+                  command: ["echo"]
+                - name: worker-b
+                  image: alpine:3.18
+                  command: ["echo"]
+        ''')
+        executor = ComposeExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        leader_node = executor._task_nodes['leader']
+        tokens = executor._build_token_map(leader_node)
+
+        self.assertEqual(tokens['host:leader'], 'leader')
+        self.assertEqual(tokens['host:worker-a'], 'worker-a')
+        self.assertEqual(tokens['host:worker-b'], 'worker-b')
+        self.assertEqual(tokens['output'], f'{CONTAINER_DATA_PATH}/output')
+
+    def test_no_host_tokens_for_single_task_group(self):
+        """A single-task group still gets a host token for itself."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: single
+              tasks:
+              - name: solo
+                image: alpine:3.18
+                command: ["echo"]
+        ''')
+        executor = ComposeExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+
+        node = executor._task_nodes['solo']
+        tokens = executor._build_token_map(node)
+        self.assertIn('host:solo', tokens)
+
+
+class TestComposeValidation(unittest.TestCase):
+    """Verify compose-mode validation accepts host tokens but rejects cluster features."""
+
+    def _make_executor(self) -> ComposeExecutor:
+        return ComposeExecutor(work_dir='/tmp/unused')
+
+    def test_host_tokens_accepted(self):
+        """Specs with {{host:taskname}} tokens pass compose validation."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: host-ok
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+                  args: ["--peer={{host:follower}}"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["echo"]
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_compose(spec)
+
+    def test_host_token_cross_group_rejected(self):
+        """A {{host:taskname}} that references a task in another group is rejected."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: cross-group
+              groups:
+              - name: group-a
+                tasks:
+                - name: task-a
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+                  args: ["--peer={{host:task-b}}"]
+              - name: group-b
+                tasks:
+                - name: task-b
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_compose(spec)
+        self.assertIn('host:task-b', str(context.exception))
+        self.assertIn('outside its group', str(context.exception))
+
+    def test_dataset_input_rejected(self):
+        """Dataset inputs are still rejected in compose mode."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                inputs:
+                - dataset:
+                    name: my_dataset
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_compose(spec)
+        self.assertIn('dataset', str(context.exception))
+
+    def test_credentials_rejected(self):
+        """Credentials are rejected in compose mode."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                credentials:
+                  my-secret: NGC_API_KEY
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        with self.assertRaises(ValueError) as context:
+            executor._validate_for_compose(spec)
+        self.assertIn('credentials', str(context.exception))
+
+    def test_simple_spec_passes(self):
+        """A simple spec with only task-to-task inputs passes compose validation."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: ok
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["echo"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: producer
+        ''')
+        executor = self._make_executor()
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_compose(spec)
+
+
+class TestFindReadyWave(unittest.TestCase):
+    """Verify the group-aware wave scheduling logic."""
+
+    def _make_executor(self, spec_text: str) -> ComposeExecutor:
+        executor = ComposeExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        return executor
+
+    def test_all_independent_tasks_in_one_wave(self):
+        """All independent tasks appear in the first wave."""
+        executor = self._make_executor(textwrap.dedent('''\
+            workflow:
+              name: parallel
+              tasks:
+              - name: a
+                image: alpine:3.18
+                command: ["echo"]
+              - name: b
+                image: alpine:3.18
+                command: ["echo"]
+              - name: c
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        wave = executor._find_ready_wave()
+        self.assertEqual(set(wave), {'a', 'b', 'c'})
+
+    def test_serial_chain_one_per_wave(self):
+        """A serial chain yields one task per wave."""
+        executor = self._make_executor(textwrap.dedent('''\
+            workflow:
+              name: serial
+              tasks:
+              - name: first
+                image: alpine:3.18
+                command: ["echo"]
+              - name: second
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: first
+        '''))
+
+        wave1 = executor._find_ready_wave()
+        self.assertEqual(wave1, ['first'])
+
+        executor._results['first'] = TaskResult(
+            name='first', exit_code=0, output_dir='/tmp/out')
+        wave2 = executor._find_ready_wave()
+        self.assertEqual(wave2, ['second'])
+
+    def test_multi_task_group_co_scheduled(self):
+        """All tasks in a multi-task group appear in the same wave."""
+        executor = self._make_executor(textwrap.dedent('''\
+            workflow:
+              name: grouped
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["echo"]
+        '''))
+        wave = executor._find_ready_wave()
+        self.assertEqual(set(wave), {'leader', 'follower'})
+
+    def test_diamond_dag_waves(self):
+        """A diamond DAG produces three waves: root, fan-out, fan-in."""
+        executor = self._make_executor(textwrap.dedent('''\
+            workflow:
+              name: diamond
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["echo"]
+              - name: left
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+              - name: right
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: root
+              - name: join
+                image: alpine:3.18
+                command: ["echo"]
+                inputs:
+                - task: left
+                - task: right
+        '''))
+
+        wave1 = executor._find_ready_wave()
+        self.assertEqual(wave1, ['root'])
+
+        executor._results['root'] = TaskResult(
+            name='root', exit_code=0, output_dir='/tmp/out')
+        wave2 = executor._find_ready_wave()
+        self.assertEqual(set(wave2), {'left', 'right'})
+
+        executor._results['left'] = TaskResult(
+            name='left', exit_code=0, output_dir='/tmp/out')
+        executor._results['right'] = TaskResult(
+            name='right', exit_code=0, output_dir='/tmp/out')
+        wave3 = executor._find_ready_wave()
+        self.assertEqual(wave3, ['join'])
+
+    def test_empty_wave_when_all_done(self):
+        """An empty wave is returned when all tasks have completed."""
+        executor = self._make_executor(textwrap.dedent('''\
+            workflow:
+              name: done
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        executor._results['task'] = TaskResult(
+            name='task', exit_code=0, output_dir='/tmp/out')
+        wave = executor._find_ready_wave()
+        self.assertEqual(wave, [])
+
+
+class TestComposeProjectName(unittest.TestCase):
+    """Verify the Docker Compose project name generation."""
+
+    def test_simple_name(self):
+        executor = ComposeExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(textwrap.dedent('''\
+            workflow:
+              name: my-workflow
+              tasks:
+              - name: t
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        self.assertEqual(executor._compose_project_name(spec), 'osmo-my-workflow')
+
+    def test_name_with_special_chars(self):
+        executor = ComposeExecutor(work_dir='/tmp/unused')
+        spec = executor.load_spec(textwrap.dedent('''\
+            workflow:
+              name: my-workflow
+              tasks:
+              - name: t
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        project = executor._compose_project_name(spec)
+        self.assertTrue(project.startswith('osmo-'))
+        self.assertRegex(project, r'^[a-z0-9-]+$')
+
+
+class TestJinjaTemplateDetection(unittest.TestCase):
+    """Verify that Jinja templates are rejected before execution."""
+
+    def _write_temp_spec(self, content: str) -> str:
+        f = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False)
+        f.write(content)
+        f.flush()
+        f.close()
+        return f.name
+
+    def test_jinja_block_detected(self):
+        path = self._write_temp_spec(textwrap.dedent('''\
+            workflow:
+              name: {% if true %}test{% endif %}
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+        '''))
+        try:
+            with self.assertRaises(ValueError) as context:
+                run_workflow_compose(path)
+            self.assertIn('Jinja', str(context.exception))
+        finally:
+            os.unlink(path)
+
+    def test_default_values_detected(self):
+        path = self._write_temp_spec(textwrap.dedent('''\
+            workflow:
+              name: "{{experiment}}"
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+            default-values:
+              experiment: test
+        '''))
+        try:
+            with self.assertRaises(ValueError) as context:
+                run_workflow_compose(path)
+            self.assertIn('Jinja', str(context.exception))
+        finally:
+            os.unlink(path)
+
+
+class TestUnresolvedTokenDetection(unittest.TestCase):
+    """Verify that unresolved tokens are caught during compose file generation."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-compose-tokens-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_unresolved_jinja_variable_caught(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "{{missing_var}}"]
+        ''')
+        executor = ComposeExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('missing_var', str(context.exception))
+
+
+class TestPathTraversal(unittest.TestCase):
+    """Verify that file path traversal is prevented."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-compose-traversal-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_path_traversal_rejected(self):
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: bad
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo"]
+                files:
+                - contents: "malicious"
+                  path: /../../etc/evil.conf
+        ''')
+        executor = ComposeExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        with self.assertRaises(ValueError) as context:
+            executor.execute(spec)
+        self.assertIn('escapes the task directory', str(context.exception))
+
+
+class TestRunWorkflowComposeErrors(unittest.TestCase):
+    """Test error handling in run_workflow_compose()."""
+
+    def test_nonexistent_file_raises(self):
+        with self.assertRaises(FileNotFoundError):
+            run_workflow_compose(spec_path='/nonexistent/path/spec.yaml')
+
+
+# ============================================================================
+# Integration tests — require Docker Compose
+# ============================================================================
+
+
+@unittest.skipUnless(DOCKER_COMPOSE_AVAILABLE, SKIP_COMPOSE_MSG)
+class TestComposeExecution(unittest.TestCase):
+    """Integration tests that run workflows through Docker Compose."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-compose-test-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def _execute_spec(self, spec_text: str) -> bool:
+        executor = ComposeExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        return executor.execute(spec)
+
+    def test_hello_world(self):
+        """Run a minimal single-task workflow."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: hello-compose
+              tasks:
+              - name: hello
+                image: alpine:3.18
+                command: ["echo", "Hello from Docker Compose!"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    def test_parallel_independent_tasks(self):
+        """Independent tasks all execute and produce their outputs."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: parallel-compose
+              tasks:
+              - name: task-a
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'a' > {{output}}/marker.txt"]
+              - name: task-b
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'b' > {{output}}/marker.txt"]
+              - name: task-c
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'c' > {{output}}/marker.txt"]
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        for task_name, expected in [('task-a', 'a'), ('task-b', 'b'), ('task-c', 'c')]:
+            marker = os.path.join(self.work_dir, task_name, 'output', 'marker.txt')
+            with open(marker) as f:
+                self.assertEqual(f.read().strip(), expected)
+
+    def test_serial_data_flow(self):
+        """Data written by a producer is readable by a consumer."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: serial-compose
+              tasks:
+              - name: producer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'from_producer' > {{output}}/data.txt"]
+              - name: consumer
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:0}}/data.txt > {{output}}/received.txt"]
+                inputs:
+                - task: producer
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        received = os.path.join(self.work_dir, 'consumer', 'output', 'received.txt')
+        with open(received) as f:
+            self.assertEqual(f.read().strip(), 'from_producer')
+
+    def test_diamond_dag(self):
+        """A diamond DAG executes with correct data flow."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: diamond-compose
+              tasks:
+              - name: root
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'root_data' > {{output}}/base.txt"]
+              - name: left
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'left:' > {{output}}/result.txt && cat {{input:0}}/base.txt >> {{output}}/result.txt"]
+                inputs:
+                - task: root
+              - name: right
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["echo 'right:' > {{output}}/result.txt && cat {{input:0}}/base.txt >> {{output}}/result.txt"]
+                inputs:
+                - task: root
+              - name: join
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["cat {{input:0}}/result.txt > {{output}}/final.txt && cat {{input:1}}/result.txt >> {{output}}/final.txt"]
+                inputs:
+                - task: left
+                - task: right
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        final = os.path.join(self.work_dir, 'join', 'output', 'final.txt')
+        with open(final) as f:
+            content = f.read()
+        self.assertIn('left:', content)
+        self.assertIn('right:', content)
+        self.assertIn('root_data', content)
+
+    def test_failure_cancels_downstream(self):
+        """A failed task prevents downstream dependents from running."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: fail-compose
+              tasks:
+              - name: failing
+                image: alpine:3.18
+                command: ["sh", "-c", "exit 1"]
+              - name: should-not-run
+                image: alpine:3.18
+                command: ["sh", "-c", "echo oops > {{output}}/bad.txt"]
+                inputs:
+                - task: failing
+        ''')
+        self.assertFalse(self._execute_spec(spec_text))
+        output_file = os.path.join(
+            self.work_dir, 'should-not-run', 'output', 'bad.txt')
+        self.assertFalse(os.path.exists(output_file))
+
+    def test_environment_variables(self):
+        """Environment variables are passed to compose containers."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: env-compose
+              tasks:
+              - name: check-env
+                image: alpine:3.18
+                command: ["sh", "-c"]
+                args: ["test \\"$MY_VAR\\" = \\"hello\\" && echo ok > {{output}}/result.txt"]
+                environment:
+                  MY_VAR: hello
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+
+    def test_inline_file_mounted(self):
+        """An inline file is written and mounted into the container."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: files-compose
+              tasks:
+              - name: check-file
+                image: alpine:3.18
+                command: ["sh", "/tmp/run.sh"]
+                files:
+                - contents: |
+                    echo "script ran" > {{output}}/result.txt
+                  path: /tmp/run.sh
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        result = os.path.join(self.work_dir, 'check-file', 'output', 'result.txt')
+        with open(result) as f:
+            self.assertIn('script ran', f.read())
+
+    def test_compose_file_preserved(self):
+        """The generated docker-compose.yml is kept in the work directory."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: preserve-compose
+              tasks:
+              - name: task
+                image: alpine:3.18
+                command: ["echo", "ok"]
+        ''')
+        self._execute_spec(spec_text)
+        compose_path = os.path.join(self.work_dir, COMPOSE_FILE_NAME)
+        self.assertTrue(os.path.exists(compose_path))
+
+    def test_groups_with_data_flow(self):
+        """Groups with inter-group data dependencies execute correctly."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: group-flow-compose
+              groups:
+              - name: prepare
+                tasks:
+                - name: generate
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c"]
+                  args:
+                  - |
+                    mkdir -p {{output}}/data
+                    for i in 1 2 3; do echo "sample_$i" >> {{output}}/data/dataset.csv; done
+              - name: train
+                tasks:
+                - name: trainer
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c"]
+                  args:
+                  - |
+                    wc -l {{input:0}}/data/dataset.csv > {{output}}/count.txt
+                  inputs:
+                  - task: generate
+        ''')
+        self.assertTrue(self._execute_spec(spec_text))
+        count_file = os.path.join(self.work_dir, 'trainer', 'output', 'count.txt')
+        with open(count_file) as f:
+            self.assertIn('3', f.read())
+
+
+@unittest.skipUnless(DOCKER_COMPOSE_AVAILABLE, SKIP_COMPOSE_MSG)
+class TestComposeLeadTaskPolicy(unittest.TestCase):
+    """Verify ignoreNonleadStatus behavior in compose mode."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-compose-lead-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_nonlead_failure_ignored_when_flag_true(self):
+        """With ignoreNonleadStatus=true, a non-lead failure does not abort the workflow."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: lead-policy-compose
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["sh", "-c", "exit 1"]
+        ''')
+        executor = ComposeExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertTrue(executor.execute(spec))
+
+    def test_lead_failure_aborts_workflow(self):
+        """A lead task failure aborts the workflow even with ignoreNonleadStatus=true."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: lead-fail-compose
+              groups:
+              - name: workers
+                tasks:
+                - name: leader
+                  lead: true
+                  image: alpine:3.18
+                  command: ["sh", "-c", "exit 1"]
+                - name: follower
+                  image: alpine:3.18
+                  command: ["echo", "ok"]
+        ''')
+        executor = ComposeExecutor(work_dir=self.work_dir, keep_work_dir=True)
+        spec = executor.load_spec(spec_text)
+        self.assertFalse(executor.execute(spec))
+
+
+@unittest.skipUnless(DOCKER_COMPOSE_AVAILABLE, SKIP_COMPOSE_MSG)
+class TestRunWorkflowCompose(unittest.TestCase):
+    """Test the top-level run_workflow_compose() function."""
+
+    def setUp(self):
+        self.work_dir = tempfile.mkdtemp(prefix='osmo-compose-func-')
+
+    def tearDown(self):
+        shutil.rmtree(self.work_dir, ignore_errors=True)
+
+    def test_success_with_work_dir(self):
+        """A successful run preserves the caller-supplied work directory."""
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            f.write(textwrap.dedent('''\
+                workflow:
+                  name: func-test
+                  tasks:
+                  - name: task
+                    image: alpine:3.18
+                    command: ["echo", "ok"]
+            '''))
+            spec_path = f.name
+        try:
+            result = run_workflow_compose(
+                spec_path=spec_path,
+                work_dir=self.work_dir,
+                keep_work_dir=True,
+            )
+            self.assertTrue(result)
+            self.assertTrue(os.path.exists(self.work_dir))
+        finally:
+            os.unlink(spec_path)
+
+    def test_failure_preserves_work_dir(self):
+        """On failure, the work directory is preserved."""
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            f.write(textwrap.dedent('''\
+                workflow:
+                  name: fail-func
+                  tasks:
+                  - name: task
+                    image: alpine:3.18
+                    command: ["sh", "-c", "exit 1"]
+            '''))
+            spec_path = f.name
+        try:
+            result = run_workflow_compose(
+                spec_path=spec_path,
+                work_dir=self.work_dir,
+                keep_work_dir=False,
+            )
+            self.assertFalse(result)
+            self.assertTrue(os.path.exists(self.work_dir))
+        finally:
+            os.unlink(spec_path)
+
+
+if __name__ == '__main__':
+    unittest.main()

From fd1a794c3be8897abc1e147fe5a8b62f013bd4fe Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 11:51:28 -0700
Subject: [PATCH 23/35] Enhance Docker Compose and Standalone Executors with
 error handling and cleanup

- Added error handling in `ComposeExecutor` to raise `NotImplementedError` for unsupported resume functionality.
- Refactored `run_workflow_compose` and `run_workflow_standalone` to ensure proper cleanup of temporary work directories using a `try-finally` block.
- Introduced validation in `StandaloneExecutor` to check for correct YAML mapping in workflow specifications, raising a `ValueError` for invalid formats.
- Updated build configurations to include the new `docker_compose.py` file and the `compose_executor` library.
---
 src/cli/BUILD                    |  2 ++
 src/utils/compose_executor.py    | 25 +++++++++++++++----------
 src/utils/standalone_executor.py | 27 ++++++++++++++++-----------
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/src/cli/BUILD b/src/cli/BUILD
index eade2ea71..9789a9923 100755
--- a/src/cli/BUILD
+++ b/src/cli/BUILD
@@ -37,6 +37,7 @@ osmo_py_library(
         "dataset.py",
         "editor.py",
         "formatters.py",
+        "docker_compose.py",
         "standalone.py",
         "login.py",
         "main_parser.py",
@@ -74,6 +75,7 @@ osmo_py_library(
         "//src/lib/utils:validation",
         "//src/lib/utils:version",
         "//src/lib/utils:workflow",
+        "//src/utils:compose_executor",
         "//src/utils:standalone_executor",
     ],
 )
diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
index 928ddcada..f1ff8d519 100644
--- a/src/utils/compose_executor.py
+++ b/src/utils/compose_executor.py
@@ -86,6 +86,10 @@ def _compose_base_cmd(self, spec: workflow_module.WorkflowSpec) -> List[str]:
     def execute(self, spec: workflow_module.WorkflowSpec,
                 resume: bool = False, from_step: str | None = None) -> bool:
         """Run all tasks in wave-parallel order via Docker Compose."""
+        if resume or from_step:
+            raise NotImplementedError(
+                'docker-compose mode does not support --resume or --from-step yet. '
+                'Use standalone mode for resume functionality.')
         self._results.clear()
         self._build_dag(spec)
         self._validate_for_compose(spec)
@@ -513,15 +517,16 @@ def run_workflow_compose(spec_path: str, work_dir: str | None = None,
         work_dir = tempfile.mkdtemp(prefix='osmo-compose-')
         logger.info('Using temporary work directory: %s', work_dir)
 
-    executor = ComposeExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                                compose_cmd=compose_cmd, shm_size=shm_size)
-    spec = executor.load_spec(spec_text)
-    success = executor.execute(spec)
-
-    if created_work_dir and not keep_work_dir and success:
-        logger.info('Cleaning up work directory: %s', work_dir)
-        shutil.rmtree(work_dir, ignore_errors=True)
-    elif not success:
-        logger.info('Work directory preserved for debugging: %s', work_dir)
+    success = False
+    try:
+        executor = ComposeExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
+                                    compose_cmd=compose_cmd, shm_size=shm_size)
+        spec = executor.load_spec(spec_text)
+        success = executor.execute(spec)
+    finally:
+        if created_work_dir and not keep_work_dir:
+            shutil.rmtree(work_dir, ignore_errors=True)
+        elif not success:
+            logger.info('Work directory preserved for debugging: %s', work_dir)
 
     return success
diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 8f4493140..588572d3f 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -118,6 +118,10 @@ def _detect_available_gpus(self) -> int:
     def load_spec(self, spec_text: str) -> workflow_module.WorkflowSpec:
         """Parse raw YAML text into a validated WorkflowSpec via the versioned spec model."""
         raw = yaml.safe_load(spec_text)
+        if not isinstance(raw, dict):
+            raise ValueError(
+                f'Expected a YAML mapping for the workflow spec, '
+                f'got {type(raw).__name__}')
         versioned = workflow_module.VersionedWorkflowSpec(**raw)
         return versioned.workflow
 
@@ -569,16 +573,17 @@ def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
         work_dir = tempfile.mkdtemp(prefix='osmo-standalone-')
         logger.info('Using temporary work directory: %s', work_dir)
 
-    executor = StandaloneExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                                   docker_cmd=docker_cmd, shm_size=shm_size)
-    spec = executor.load_spec(spec_text)
-    success = executor.execute(spec, resume=resume or from_step is not None,
-                               from_step=from_step)
-
-    if created_work_dir and not keep_work_dir and success:
-        logger.info('Cleaning up work directory: %s', work_dir)
-        shutil.rmtree(work_dir, ignore_errors=True)
-    elif not success:
-        logger.info('Work directory preserved for debugging: %s', work_dir)
+    success = False
+    try:
+        executor = StandaloneExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
+                                       docker_cmd=docker_cmd, shm_size=shm_size)
+        spec = executor.load_spec(spec_text)
+        success = executor.execute(spec, resume=resume or from_step is not None,
+                                   from_step=from_step)
+    finally:
+        if created_work_dir and not keep_work_dir:
+            shutil.rmtree(work_dir, ignore_errors=True)
+        elif not success:
+            logger.info('Work directory preserved for debugging: %s', work_dir)
 
     return success

From 47ff70fcb5dead2f4ba133731462dd33ab46bcbe Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 12:03:08 -0700
Subject: [PATCH 24/35] Add cleanup functionality in StandaloneExecutor for
 rerun tasks

- Introduced a new method `_clean_rerun_output_dirs` to remove output directories for tasks that will be re-executed, ensuring no stale artifacts remain.
- Updated the `run_workflow_standalone` method to call the cleanup function when resuming or starting from a specific step.
- Adjusted GPU device specification formatting in Docker arguments to ensure correct syntax with quotes around device lists.
---
 src/utils/standalone_executor.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 588572d3f..a33dd9b84 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -135,6 +135,7 @@ def execute(self, spec: workflow_module.WorkflowSpec,
 
         if resume or from_step:
             self._restore_completed_tasks(from_step)
+            self._clean_rerun_output_dirs()
 
         total_tasks = sum(len(g.tasks) for g in self._groups(spec))
         skipped = len(self._results)
@@ -250,6 +251,16 @@ def _get_downstream_tasks(self, task_name: str) -> Set[str]:
                     queue.append(downstream)
         return visited
 
+    def _clean_rerun_output_dirs(self):
+        """Remove output directories for tasks that will be re-executed so no stale artifacts remain."""
+        tasks_to_rerun = set(self._task_nodes.keys()) - set(self._results.keys())
+        for task_name in tasks_to_rerun:
+            output_dir = os.path.join(self._work_dir, task_name, 'output')
+            if os.path.isdir(output_dir):
+                shutil.rmtree(output_dir)
+                os.makedirs(output_dir, exist_ok=True)
+                logger.debug('Cleaned output directory for task "%s"', task_name)
+
     def _groups(self, spec: workflow_module.WorkflowSpec) -> List[task_module.TaskGroupSpec]:
         """Return the spec's groups, or synthesize one group per task when groups are absent."""
         if spec.groups:
@@ -417,6 +428,8 @@ def _cancel_downstream(self, failed_task: str):
     def _task_gpu_count(self, task_spec: task_module.TaskSpec,
                         spec: workflow_module.WorkflowSpec) -> int:
         """Return the number of GPUs requested by a task's resource spec, defaulting to 0."""
+        if task_spec.resources.gpu:
+            return task_spec.resources.gpu
         resource_spec = spec.resources.get(task_spec.resource)
         if resource_spec and resource_spec.gpu:
             return resource_spec.gpu
@@ -463,9 +476,9 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 logger.warning(
                     'Task "%s" requests %d GPU(s) but only %d available — running with %d GPU(s)',
                     node.name, gpu_count, available, available)
-                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(available))}']
+                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(available))}"']
             else:
-                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(gpu_count))}']
+                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
             logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
             docker_args += ['--shm-size', self._shm_size or self.DEFAULT_SHM_SIZE]

From dbf479056a21230ae13951cebb5acdba6555fd61 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 16:09:57 -0700
Subject: [PATCH 25/35] Enhance CLI functionality with credential mapping and
 variable overrides

- Added support for `--credential`, `--set`, and `--set-string` arguments in both `docker_compose.py` and `standalone.py` to allow users to map credential names to local directories and override default values in workflow specifications.
- Implemented `_parse_credentials` function to validate and parse credential inputs.
- Updated `ComposeExecutor` and `StandaloneExecutor` to handle new credential and variable parameters, ensuring proper binding of directories and values during execution.
- Enhanced error handling for unsupported credential configurations in workflow specifications.
---
 .gitignore                       |   4 +-
 src/cli/docker_compose.py        |  42 +++++++++++
 src/cli/standalone.py            |  42 +++++++++++
 src/utils/BUILD                  |   2 +
 src/utils/compose_executor.py    |  50 +++++++++-----
 src/utils/standalone_executor.py | 115 ++++++++++++++++++++++++++-----
 6 files changed, 218 insertions(+), 37 deletions(-)

diff --git a/.gitignore b/.gitignore
index b84388d41..f5180c6e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,4 +30,6 @@ docs/**/domain_config.js
 
 .lycheecache
 
-.venv/
\ No newline at end of file
+.venv/
+build/
+*.egg-info
\ No newline at end of file
diff --git a/src/cli/docker_compose.py b/src/cli/docker_compose.py
index ab44e4f62..cde0c173f 100644
--- a/src/cli/docker_compose.py
+++ b/src/cli/docker_compose.py
@@ -17,6 +17,7 @@
 """
 
 import argparse
+import os
 import sys
 
 import shtab
@@ -58,6 +59,28 @@ def setup_parser(parser: argparse._SubParsersAction):
         default='docker compose',
         help='Docker Compose command to use (e.g. "docker-compose" for V1). '
              'Default: "docker compose".')
+    run_parser.add_argument(
+        '--credential',
+        nargs='+',
+        default=[],
+        help='Map credential names to local directories. '
+             'Format: "<name>=<path>". The directory is bind-mounted read-only '
+             'into the container at the path declared in the spec. '
+             'Example: --credential hf-token=$HOME/.hf')
+    run_parser.add_argument(
+        '--set',
+        nargs='+',
+        default=[],
+        help='Override default-values in the workflow spec. '
+             'Format: "<field>=<value>". Values are cast as int or float if '
+             'applicable, otherwise kept as strings.')
+    run_parser.add_argument(
+        '--set-string',
+        dest='set_string',
+        nargs='+',
+        default=[],
+        help='Override default-values in the workflow spec, forcing string type. '
+             'Format: "<field>=<value>".')
     run_parser.add_argument(
         '--shm-size',
         dest='shm_size',
@@ -67,15 +90,34 @@ def setup_parser(parser: argparse._SubParsersAction):
     run_parser.set_defaults(func=_run_compose)
 
 
+def _parse_credentials(raw_credentials: list[str]) -> dict[str, str]:
+    """Parse --credential name=path arguments into a dict."""
+    result: dict[str, str] = {}
+    for entry in raw_credentials:
+        if '=' not in entry:
+            raise ValueError(
+                f'--credential value "{entry}" is incorrectly formatted (expected name=/path)')
+        name, path = entry.split('=', 1)
+        if not os.path.isdir(path):
+            raise ValueError(
+                f'Credential path for "{name}" does not exist or is not a directory: {path}')
+        result[name] = path
+    return result
+
+
 def _run_compose(service_client, args: argparse.Namespace):
     """Execute a workflow via Docker Compose using the parsed CLI arguments."""
     try:
+        credentials = _parse_credentials(args.credential)
         success = compose_executor.run_workflow_compose(
             spec_path=args.workflow_file,
             work_dir=args.work_dir,
             keep_work_dir=args.keep,
             compose_cmd=args.compose_cmd,
             shm_size=args.shm_size,
+            set_variables=args.set,
+            set_string_variables=args.set_string,
+            credentials=credentials,
         )
     except (ValueError, FileNotFoundError, PermissionError) as error:
         print(f'Error: {error}', file=sys.stderr)
diff --git a/src/cli/standalone.py b/src/cli/standalone.py
index d30e764ff..8c3c200f7 100644
--- a/src/cli/standalone.py
+++ b/src/cli/standalone.py
@@ -17,6 +17,7 @@
 """
 
 import argparse
+import os
 import sys
 
 import shtab
@@ -68,6 +69,28 @@ def setup_parser(parser: argparse._SubParsersAction):
         help='Resume from a specific task, re-running it and all downstream tasks. '
              'Tasks upstream of the specified step are skipped if they completed '
              'successfully. Requires --work-dir pointing to the previous run directory.')
+    run_parser.add_argument(
+        '--credential',
+        nargs='+',
+        default=[],
+        help='Map credential names to local directories. '
+             'Format: "<name>=<path>". The directory is bind-mounted read-only '
+             'into the container at the path declared in the spec. '
+             'Example: --credential hf-token=$HOME/.hf')
+    run_parser.add_argument(
+        '--set',
+        nargs='+',
+        default=[],
+        help='Override default-values in the workflow spec. '
+             'Format: "<field>=<value>". Values are cast as int or float if '
+             'applicable, otherwise kept as strings.')
+    run_parser.add_argument(
+        '--set-string',
+        dest='set_string',
+        nargs='+',
+        default=[],
+        help='Override default-values in the workflow spec, forcing string type. '
+             'Format: "<field>=<value>".')
     run_parser.add_argument(
         '--shm-size',
         dest='shm_size',
@@ -78,9 +101,25 @@ def setup_parser(parser: argparse._SubParsersAction):
     run_parser.set_defaults(func=_run_standalone)
 
 
+def _parse_credentials(raw_credentials: list[str]) -> dict[str, str]:
+    """Parse --credential name=path arguments into a dict."""
+    result: dict[str, str] = {}
+    for entry in raw_credentials:
+        if '=' not in entry:
+            raise ValueError(
+                f'--credential value "{entry}" is incorrectly formatted (expected name=/path)')
+        name, path = entry.split('=', 1)
+        if not os.path.isdir(path):
+            raise ValueError(
+                f'Credential path for "{name}" does not exist or is not a directory: {path}')
+        result[name] = path
+    return result
+
+
 def _run_standalone(service_client, args: argparse.Namespace):
     """Execute a workflow in standalone mode via Docker using the parsed CLI arguments."""
     try:
+        credentials = _parse_credentials(args.credential)
         success = standalone_executor.run_workflow_standalone(
             spec_path=args.workflow_file,
             work_dir=args.work_dir,
@@ -89,6 +128,9 @@ def _run_standalone(service_client, args: argparse.Namespace):
             from_step=args.from_step,
             docker_cmd=args.docker_cmd,
             shm_size=args.shm_size,
+            set_variables=args.set,
+            set_string_variables=args.set_string,
+            credentials=credentials,
         )
     except (ValueError, FileNotFoundError, PermissionError) as error:
         print(f'Error: {error}', file=sys.stderr)
diff --git a/src/utils/BUILD b/src/utils/BUILD
index 3e5eaae19..a6674a2f3 100644
--- a/src/utils/BUILD
+++ b/src/utils/BUILD
@@ -131,7 +131,9 @@ osmo_py_library(
     name = "standalone_executor",
     srcs = ["standalone_executor.py"],
     deps = [
+        requirement("jinja2"),
         requirement("pyyaml"),
+        "//src/lib/utils:workflow",
         "//src/utils/job",
     ],
     visibility = ["//visibility:public"],
diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
index f1ff8d519..bfe718841 100644
--- a/src/utils/compose_executor.py
+++ b/src/utils/compose_executor.py
@@ -34,6 +34,8 @@
     StandaloneExecutor,
     TaskNode,
     TaskResult,
+    _expand_jinja_locally,
+    _spec_has_templates,
 )
 
 
@@ -61,9 +63,11 @@ class ComposeExecutor(StandaloneExecutor):
     """
 
     def __init__(self, work_dir: str, keep_work_dir: bool = False,
-                 compose_cmd: str = 'docker compose', shm_size: str | None = None):
+                 compose_cmd: str = 'docker compose', shm_size: str | None = None,
+                 credentials: Dict[str, str] | None = None):
         super().__init__(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                         docker_cmd='docker', shm_size=shm_size)
+                         docker_cmd='docker', shm_size=shm_size,
+                         credentials=credentials)
         self._compose_cmd = compose_cmd
 
     @property
@@ -177,14 +181,20 @@ def _validate_for_compose(self, spec: workflow_module.WorkflowSpec):
                             f'Task "{task_spec.name}": URL inputs require network/storage access')
 
                 for output in task_spec.outputs:
-                    if isinstance(output, (task_module.DatasetInputOutput,
-                                           task_module.URLInputOutput)):
+                    if isinstance(output, task_module.URLInputOutput):
                         unsupported_features.append(
-                            f'Task "{task_spec.name}": dataset/URL outputs require object storage')
-
-                if task_spec.credentials:
-                    unsupported_features.append(
-                        f'Task "{task_spec.name}": credentials require the OSMO secret manager')
+                            f'Task "{task_spec.name}": URL outputs require object storage')
+                    elif isinstance(output, task_module.DatasetInputOutput):
+                        logger.info(
+                            'Task "%s": dataset output "%s" ignored in docker-compose mode '
+                            '— data is available in the work directory',
+                            task_spec.name, output.dataset.name)
+
+                for cred_name in task_spec.credentials:
+                    if cred_name not in self._credentials:
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": credential "{cred_name}" not provided. '
+                            f'Use --credential {cred_name}=/path/to/dir')
 
                 if task_spec.checkpoint:
                     unsupported_features.append(
@@ -355,6 +365,11 @@ def _build_compose_service(self, node: TaskNode,
                 os.path.join(files_dir, file_spec.path.lstrip('/')))
             volumes.append(f'{host_path}:{file_spec.path}:ro')
 
+        for cred_name, cred_mount in task_spec.credentials.items():
+            if isinstance(cred_mount, str) and cred_name in self._credentials:
+                local_dir = os.path.abspath(self._credentials[cred_name])
+                volumes.append(f'{local_dir}:{cred_mount}:ro')
+
         if volumes:
             service['volumes'] = volumes
 
@@ -500,17 +515,17 @@ def _compose_cleanup(self, spec: workflow_module.WorkflowSpec):
 def run_workflow_compose(spec_path: str, work_dir: str | None = None,
                          keep_work_dir: bool = False,
                          compose_cmd: str = 'docker compose',
-                         shm_size: str | None = None) -> bool:
+                         shm_size: str | None = None,
+                         set_variables: List[str] | None = None,
+                         set_string_variables: List[str] | None = None,
+                         credentials: Dict[str, str] | None = None) -> bool:
     """Load a workflow spec and execute it via Docker Compose."""
     with open(spec_path, encoding='utf-8') as f:
         spec_text = f.read()
 
-    template_markers = ('{%', '{#', 'default-values')
-    if any(marker in spec_text for marker in template_markers):
-        raise ValueError(
-            'This spec uses Jinja templates which require server-side expansion.\n'
-            'Run "osmo workflow submit --dry-run -f <spec>" first to get the '
-            'expanded spec,\nthen save that output and run it with docker-compose.')
+    if _spec_has_templates(spec_text):
+        logger.info('Spec contains Jinja templates — expanding locally')
+        spec_text = _expand_jinja_locally(spec_text, set_variables, set_string_variables)
 
     created_work_dir = work_dir is None
     if work_dir is None:
@@ -520,7 +535,8 @@ def run_workflow_compose(spec_path: str, work_dir: str | None = None,
     success = False
     try:
         executor = ComposeExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                                    compose_cmd=compose_cmd, shm_size=shm_size)
+                                    compose_cmd=compose_cmd, shm_size=shm_size,
+                                    credentials=credentials)
         spec = executor.load_spec(spec_text)
         success = executor.execute(spec)
     finally:
diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index a33dd9b84..1d8f20d29 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -17,6 +17,7 @@
 """
 
 import dataclasses
+import hashlib
 import json
 import logging
 import os
@@ -24,10 +25,13 @@
 import shutil
 import subprocess
 import tempfile
-from typing import Dict, List, Set
+from typing import Any, Dict, List, Set
 
+import jinja2
+import jinja2.sandbox
 import yaml
 
+from src.lib.utils import workflow as workflow_utils
 from src.utils.job import task as task_module
 from src.utils.job import workflow as workflow_module
 
@@ -81,12 +85,14 @@ class StandaloneExecutor:
     DEFAULT_SHM_SIZE = '16g'
 
     def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str = 'docker',
-                 shm_size: str | None = None):
+                 shm_size: str | None = None,
+                 credentials: Dict[str, str] | None = None):
         """Initialize the executor with a work directory, cleanup preference, and container runtime command."""
         self._work_dir = work_dir
         self._keep_work_dir = keep_work_dir
         self._docker_cmd = docker_cmd
         self._shm_size = shm_size
+        self._credentials = credentials or {}
         self._task_nodes: Dict[str, TaskNode] = {}
         self._group_specs: Dict[str, task_module.TaskGroupSpec] = {}
         self._results: Dict[str, TaskResult] = {}
@@ -326,7 +332,7 @@ def visit(name: str) -> List[str] | None:
     _HOST_TOKEN_PATTERN = re.compile(r'\{\{\s*host:[^}]+\}\}')
 
     def _validate_for_standalone(self, spec: workflow_module.WorkflowSpec):
-        """Raise ValueError if the spec uses features unsupported in standalone mode (datasets, URLs, credentials, etc.)."""
+        """Raise ValueError if the spec uses features unsupported in standalone mode."""
         unsupported_features = []
         for group in self._groups(spec):
             for task_spec in group.tasks:
@@ -339,13 +345,20 @@ def _validate_for_standalone(self, spec: workflow_module.WorkflowSpec):
                             f'Task "{task_spec.name}": URL inputs require network/storage access')
 
                 for output in task_spec.outputs:
-                    if isinstance(output, (task_module.DatasetInputOutput, task_module.URLInputOutput)):
+                    if isinstance(output, task_module.URLInputOutput):
                         unsupported_features.append(
-                            f'Task "{task_spec.name}": dataset/URL outputs require object storage')
-
-                if task_spec.credentials:
-                    unsupported_features.append(
-                        f'Task "{task_spec.name}": credentials require the OSMO secret manager')
+                            f'Task "{task_spec.name}": URL outputs require object storage')
+                    elif isinstance(output, task_module.DatasetInputOutput):
+                        logger.info(
+                            'Task "%s": dataset output "%s" ignored in standalone mode '
+                            '— data is available in the work directory',
+                            task_spec.name, output.dataset.name)
+
+                for cred_name in task_spec.credentials:
+                    if cred_name not in self._credentials:
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": credential "{cred_name}" not provided. '
+                            f'Use --credential {cred_name}=/path/to/dir')
 
                 if task_spec.checkpoint:
                     unsupported_features.append(
@@ -499,6 +512,11 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             host_path = os.path.realpath(os.path.join(files_dir, file_spec.path.lstrip('/')))
             docker_args += ['-v', f'{host_path}:{file_spec.path}:ro']
 
+        for cred_name, cred_mount in task_spec.credentials.items():
+            if isinstance(cred_mount, str) and cred_name in self._credentials:
+                local_dir = os.path.abspath(self._credentials[cred_name])
+                docker_args += ['-v', f'{local_dir}:{cred_mount}:ro']
+
         if resolved_command:
             docker_args += ['--entrypoint', resolved_command[0]]
         docker_args.append(task_spec.image)
@@ -556,8 +574,66 @@ def _check_unresolved_tokens(self, task_name: str, resolved_fields: List[str]):
         if unresolved:
             raise ValueError(
                 f'Task "{task_name}" has unresolved token(s): {", ".join(unresolved)}. '
-                f'If this spec uses Jinja templates, run "osmo workflow submit --dry-run -f <spec>" '
-                f'first to expand them.')
+                f'Use --set to provide values, or check for typos in template variable names.')
+
+
+_OSMO_TOKEN_PATTERN = re.compile(r'\{\{(uuid|workflow_id|output|input:[^}]+|host:[^}]+)\}\}')
+
+
+def _expand_jinja_locally(spec_text: str,
+                          set_variables: List[str] | None = None,
+                          set_string_variables: List[str] | None = None) -> str:
+    """Expand Jinja templates in a workflow spec using its default-values section and CLI overrides.
+
+    Mirrors the server-side logic in TemplateSpec.load_template_with_variables but runs
+    entirely locally: no PostgreSQL, no sandboxed worker pool.  OSMO-specific tokens
+    ({{output}}, {{input:...}}, {{host:...}}, {{uuid}}, {{workflow_id}}) are protected
+    from expansion and restored afterward.
+    """
+    file_text, default_values = workflow_utils.parse_workflow_spec(spec_text)
+    template_data: Dict[str, Any] = {}
+    if default_values:
+        template_data = default_values
+
+    for data in (set_variables or []):
+        if '=' not in data:
+            raise ValueError(f'--set value "{data}" is incorrectly formatted (expected key=value)')
+        key, raw_value = data.split('=', 1)
+        try:
+            template_data[key] = int(raw_value)
+        except ValueError:
+            try:
+                template_data[key] = float(raw_value)
+            except ValueError:
+                template_data[key] = raw_value
+
+    for data in (set_string_variables or []):
+        if '=' not in data:
+            raise ValueError(
+                f'--set-string value "{data}" is incorrectly formatted (expected key=value)')
+        key, raw_value = data.split('=', 1)
+        template_data[key] = raw_value
+
+    placeholder_map: Dict[str, str] = {}
+    for match in _OSMO_TOKEN_PATTERN.finditer(file_text):
+        field = match.group(1).strip()
+        hash_key = 'hash' + str(int(hashlib.md5(field.encode('utf-8')).hexdigest(), 16))
+        original_token = '{{' + match.group(1) + '}}'
+        template_data[hash_key] = original_token
+        placeholder_map[original_token] = hash_key
+
+    protected_text = file_text
+    for original_token, hash_key in placeholder_map.items():
+        protected_text = protected_text.replace(original_token, '{{' + hash_key + '}}')
+
+    jinja_env = jinja2.sandbox.SandboxedEnvironment(undefined=jinja2.StrictUndefined)
+    template = jinja_env.from_string(protected_text)
+    return template.render(template_data)
+
+
+def _spec_has_templates(spec_text: str) -> bool:
+    """Return True if the spec contains Jinja template markers that need expansion."""
+    return any(marker in spec_text for marker in ('{%', '{#', 'default-values'))
 
 
 def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
@@ -565,7 +641,10 @@ def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
                             resume: bool = False,
                             from_step: str | None = None,
                             docker_cmd: str = 'docker',
-                            shm_size: str | None = None) -> bool:
+                            shm_size: str | None = None,
+                            set_variables: List[str] | None = None,
+                            set_string_variables: List[str] | None = None,
+                            credentials: Dict[str, str] | None = None) -> bool:
     """Load a workflow spec from disk and execute it in standalone mode via Docker, managing the work directory lifecycle."""
     if (resume or from_step) and work_dir is None:
         raise ValueError(
@@ -574,12 +653,9 @@ def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
     with open(spec_path, encoding='utf-8') as f:
         spec_text = f.read()
 
-    template_markers = ('{%', '{#', 'default-values')
-    if any(marker in spec_text for marker in template_markers):
-        raise ValueError(
-            'This spec uses Jinja templates which require server-side expansion.\n'
-            'Run "osmo workflow submit --dry-run -f <spec>" first to get the expanded spec,\n'
-            'then save that output and run it standalone.')
+    if _spec_has_templates(spec_text):
+        logger.info('Spec contains Jinja templates — expanding locally')
+        spec_text = _expand_jinja_locally(spec_text, set_variables, set_string_variables)
 
     created_work_dir = work_dir is None
     if work_dir is None:
@@ -589,7 +665,8 @@ def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
     success = False
     try:
         executor = StandaloneExecutor(work_dir=work_dir, keep_work_dir=keep_work_dir,
-                                       docker_cmd=docker_cmd, shm_size=shm_size)
+                                       docker_cmd=docker_cmd, shm_size=shm_size,
+                                       credentials=credentials)
         spec = executor.load_spec(spec_text)
         success = executor.execute(spec, resume=resume or from_step is not None,
                                    from_step=from_step)

From 612fa2fe19b889f6a04f497ddba08fe19f7b91ff Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 17:05:40 -0700
Subject: [PATCH 26/35] Enhance credential handling in Compose and Standalone
 Executors

- Updated credential processing to support dict-style mappings, raising clear errors for unsupported configurations.
- Improved error messages for missing credentials, guiding users on the correct format for credential specification.
- Adjusted cleanup logic in `run_workflow_compose` and `run_workflow_standalone` to ensure work directories are only removed on successful execution.
---
 src/utils/compose_executor.py    | 16 +++++++++++++---
 src/utils/standalone_executor.py | 28 ++++++++++++++++++++--------
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
index bfe718841..510efcf53 100644
--- a/src/utils/compose_executor.py
+++ b/src/utils/compose_executor.py
@@ -190,8 +190,13 @@ def _validate_for_compose(self, spec: workflow_module.WorkflowSpec):
                             '— data is available in the work directory',
                             task_spec.name, output.dataset.name)
 
-                for cred_name in task_spec.credentials:
-                    if cred_name not in self._credentials:
+                for cred_name, cred_mount in task_spec.credentials.items():
+                    if isinstance(cred_mount, dict):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": credential "{cred_name}" uses '
+                            f'dict-style mapping which is not supported in docker-compose '
+                            f'mode; provide credentials as NAME=/path')
+                    elif cred_name not in self._credentials:
                         unsupported_features.append(
                             f'Task "{task_spec.name}": credential "{cred_name}" not provided. '
                             f'Use --credential {cred_name}=/path/to/dir')
@@ -366,6 +371,11 @@ def _build_compose_service(self, node: TaskNode,
             volumes.append(f'{host_path}:{file_spec.path}:ro')
 
         for cred_name, cred_mount in task_spec.credentials.items():
+            if isinstance(cred_mount, dict):
+                raise ValueError(
+                    f'Task "{node.name}": credential "{cred_name}" uses dict-style '
+                    f'mapping which is not supported in docker-compose mode; '
+                    f'provide credentials as NAME=/path')
             if isinstance(cred_mount, str) and cred_name in self._credentials:
                 local_dir = os.path.abspath(self._credentials[cred_name])
                 volumes.append(f'{local_dir}:{cred_mount}:ro')
@@ -540,7 +550,7 @@ def run_workflow_compose(spec_path: str, work_dir: str | None = None,
         spec = executor.load_spec(spec_text)
         success = executor.execute(spec)
     finally:
-        if created_work_dir and not keep_work_dir:
+        if created_work_dir and not keep_work_dir and success:
             shutil.rmtree(work_dir, ignore_errors=True)
         elif not success:
             logger.info('Work directory preserved for debugging: %s', work_dir)
diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 1d8f20d29..0a36a24cd 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -74,11 +74,12 @@ class StandaloneExecutor:
       - `environment:` passed as Docker env vars
       - Task-to-task data flow via shared local directories
       - GPU passthrough via --gpus for tasks that declare gpu > 0 in resources
+      - Credentials via --credential NAME=/path (mounted read-only in _run_task)
+      - Jinja-templated specs (expanded locally via _expand_jinja_locally)
 
     Does NOT support (raises clear errors):
       - Dataset / URL inputs/outputs (require object storage)
-      - Credentials, checkpoints, volumeMounts (require cluster infra)
-      - Templated specs with Jinja (require server-side expansion; use --dry-run first)
+      - Checkpoints, volumeMounts (require cluster infra)
       - {{host:taskname}} tokens (require parallel containers with shared networking)
     """
 
@@ -354,8 +355,14 @@ def _validate_for_standalone(self, spec: workflow_module.WorkflowSpec):
                             '— data is available in the work directory',
                             task_spec.name, output.dataset.name)
 
-                for cred_name in task_spec.credentials:
-                    if cred_name not in self._credentials:
+                for cred_name, cred_mount in task_spec.credentials.items():
+                    if isinstance(cred_mount, dict):
+                        unsupported_features.append(
+                            f'Task "{task_spec.name}": credential "{cred_name}" uses '
+                            f'dict-style mapping which the standalone executor does not '
+                            f'support; provide credentials as NAME=/path or flatten the '
+                            f'mapping')
+                    elif cred_name not in self._credentials:
                         unsupported_features.append(
                             f'Task "{task_spec.name}": credential "{cred_name}" not provided. '
                             f'Use --credential {cred_name}=/path/to/dir')
@@ -513,6 +520,11 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
             docker_args += ['-v', f'{host_path}:{file_spec.path}:ro']
 
         for cred_name, cred_mount in task_spec.credentials.items():
+            if isinstance(cred_mount, dict):
+                raise ValueError(
+                    f'Task "{node.name}": credential "{cred_name}" uses dict-style '
+                    f'mapping which the standalone executor does not support; '
+                    f'provide credentials as NAME=/path or flatten the mapping')
             if isinstance(cred_mount, str) and cred_name in self._credentials:
                 local_dir = os.path.abspath(self._credentials[cred_name])
                 docker_args += ['-v', f'{local_dir}:{cred_mount}:ro']
@@ -577,7 +589,7 @@ def _check_unresolved_tokens(self, task_name: str, resolved_fields: List[str]):
                 f'Use --set to provide values, or check for typos in template variable names.')
 
 
-_OSMO_TOKEN_PATTERN = re.compile(r'\{\{(uuid|workflow_id|output|input:[^}]+|host:[^}]+)\}\}')
+_OSMO_TOKEN_PATTERN = re.compile(r'\{\{\s*(uuid|workflow_id|output|input:[^}]+|host:[^}]+)\s*\}\}')
 
 
 def _expand_jinja_locally(spec_text: str,
@@ -618,7 +630,7 @@ def _expand_jinja_locally(spec_text: str,
     for match in _OSMO_TOKEN_PATTERN.finditer(file_text):
         field = match.group(1).strip()
         hash_key = 'hash' + str(int(hashlib.md5(field.encode('utf-8')).hexdigest(), 16))
-        original_token = '{{' + match.group(1) + '}}'
+        original_token = match.group(0)
         template_data[hash_key] = original_token
         placeholder_map[original_token] = hash_key
 
@@ -633,7 +645,7 @@ def _expand_jinja_locally(spec_text: str,
 
 def _spec_has_templates(spec_text: str) -> bool:
     """Return True if the spec contains Jinja template markers that need expansion."""
-    return any(marker in spec_text for marker in ('{%', '{#', 'default-values'))
+    return any(marker in spec_text for marker in ('{{', '{%', '{#', 'default-values'))
 
 
 def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
@@ -671,7 +683,7 @@ def run_workflow_standalone(spec_path: str, work_dir: str | None = None,
         success = executor.execute(spec, resume=resume or from_step is not None,
                                    from_step=from_step)
     finally:
-        if created_work_dir and not keep_work_dir:
+        if created_work_dir and not keep_work_dir and success:
             shutil.rmtree(work_dir, ignore_errors=True)
         elif not success:
             logger.info('Work directory preserved for debugging: %s', work_dir)

From adad9d7c185150b617543415c1ea67ace4840bb7 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 17:44:44 -0700
Subject: [PATCH 27/35] Enhance workflow fingerprinting in StandaloneExecutor

- Introduced a new method `_compute_workflow_fingerprint` to generate a SHA-256 hash of task specifications, allowing for detection of workflow changes across runs.
- Updated the state management to include the workflow fingerprint, enabling verification of spec consistency between runs and warning users of potential stale outputs.
- Improved GPU device specification formatting in Docker arguments to ensure correct syntax without unnecessary quotes.
---
 src/utils/compose_executor.py    |  4 +++-
 src/utils/standalone_executor.py | 41 ++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
index 510efcf53..c0be7b1b0 100644
--- a/src/utils/compose_executor.py
+++ b/src/utils/compose_executor.py
@@ -75,7 +75,9 @@ def _compose_file_path(self) -> str:
         return os.path.join(self._work_dir, COMPOSE_FILE_NAME)
 
     def _compose_project_name(self, spec: workflow_module.WorkflowSpec) -> str:
-        return f'osmo-{re.sub(r"[^a-z0-9-]", "-", spec.name.lower())}'
+        sanitized = re.sub(r'[^a-z0-9-]', '-', spec.name.lower())
+        sanitized = re.sub(r'-{2,}', '-', sanitized).strip('-')
+        return f'osmo-{sanitized}' if sanitized else 'osmo-default'
 
     def _compose_base_cmd(self, spec: workflow_module.WorkflowSpec) -> List[str]:
         return (
diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 0a36a24cd..47164d16a 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -98,6 +98,7 @@ def __init__(self, work_dir: str, keep_work_dir: bool = False, docker_cmd: str =
         self._group_specs: Dict[str, task_module.TaskGroupSpec] = {}
         self._results: Dict[str, TaskResult] = {}
         self._available_gpus: int | None = None
+        self._workflow_fingerprint: str = ''
 
     def _detect_available_gpus(self) -> int:
         """Query nvidia-smi to count available GPUs, caching the result for subsequent calls."""
@@ -122,6 +123,23 @@ def _detect_available_gpus(self) -> int:
             self._available_gpus = 0
         return self._available_gpus
 
+    def _compute_workflow_fingerprint(self) -> str:
+        """Compute a deterministic SHA-256 hash from task specs to detect workflow changes across runs."""
+        fingerprint_data: List[Dict[str, Any]] = []
+        for name in sorted(self._task_nodes):
+            spec = self._task_nodes[name].spec
+            fingerprint_data.append({
+                'name': name,
+                'image': spec.image,
+                'command': spec.command,
+                'args': spec.args,
+                'environment': dict(sorted(spec.environment.items())),
+                'inputs': [str(i) for i in spec.inputs],
+                'resource': spec.resource,
+            })
+        blob = json.dumps(fingerprint_data, sort_keys=True, separators=(',', ':'))
+        return hashlib.sha256(blob.encode('utf-8')).hexdigest()
+
     def load_spec(self, spec_text: str) -> workflow_module.WorkflowSpec:
         """Parse raw YAML text into a validated WorkflowSpec via the versioned spec model."""
         raw = yaml.safe_load(spec_text)
@@ -137,6 +155,7 @@ def execute(self, spec: workflow_module.WorkflowSpec,
         """Run all tasks in topological order, returning True if the entire workflow succeeds."""
         self._results.clear()
         self._build_dag(spec)
+        self._workflow_fingerprint = self._compute_workflow_fingerprint()
         self._validate_for_standalone(spec)
         self._setup_directories()
 
@@ -202,12 +221,13 @@ def _state_file_path(self) -> str:
 
     def _save_state(self):
         """Persist current task results to the state file so runs can be resumed later."""
-        state = {
+        state: Dict[str, Any] = {
+            'workflow_fingerprint': self._workflow_fingerprint,
             'tasks': {
                 name: {'exit_code': result.exit_code, 'output_dir': result.output_dir}
                 for name, result in self._results.items()
                 if result.exit_code != -1
-            }
+            },
         }
         with open(self._state_file_path, 'w', encoding='utf-8') as f:
             json.dump(state, f, indent=2)
@@ -226,6 +246,17 @@ def _restore_completed_tasks(self, from_step: str | None = None):
             logger.info('No previous state found — starting from scratch')
             return
 
+        saved_fingerprint = state.get('workflow_fingerprint')
+        if not saved_fingerprint:
+            logger.warning(
+                'State file has no workflow fingerprint — cannot verify '
+                'that the spec matches the previous run; reused outputs may be stale')
+        elif saved_fingerprint != self._workflow_fingerprint:
+            logger.warning(
+                'Workflow spec has changed since the previous run '
+                '(fingerprint %s → %s); reused outputs may be stale',
+                saved_fingerprint[:12], self._workflow_fingerprint[:12])
+
         completed: Dict[str, Dict] = {}
         for name, info in state.get('tasks', {}).items():
             if name not in self._task_nodes:
@@ -496,16 +527,16 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 logger.warning(
                     'Task "%s" requests %d GPU(s) but only %d available — running with %d GPU(s)',
                     node.name, gpu_count, available, available)
-                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(available))}"']
+                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(available))}']
             else:
-                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
+                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(gpu_count))}']
             logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
             docker_args += ['--shm-size', self._shm_size or self.DEFAULT_SHM_SIZE]
         elif self._shm_size:
             docker_args += ['--shm-size', self._shm_size]
 
-        for env_key, resolved_value in zip(task_spec.environment.keys(), resolved_env_values):
+        for env_key, resolved_value in zip(task_spec.environment.keys(), resolved_env_values, strict=True):
             docker_args += ['-e', f'{env_key}={resolved_value}']
 
         docker_args += ['-v', f'{output_dir}:{CONTAINER_DATA_PATH}/output']

From e778e58aed600f09274c6896f4b6bab8bf610e12 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 18:18:31 -0700
Subject: [PATCH 28/35] Improve error handling and logging in Compose and
 Standalone Executors

- Added error logging for JSON parsing failures in `ComposeExecutor` to aid in debugging.
- Enhanced warning messages in `ComposeExecutor` to include the full Docker compose output when no container info is found.
- Updated `StandaloneExecutor` to use `deque` for improved performance in task processing queues.
---
 src/utils/compose_executor.py    |  5 ++++-
 src/utils/standalone_executor.py | 14 ++++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
index c0be7b1b0..e81f1ea88 100644
--- a/src/utils/compose_executor.py
+++ b/src/utils/compose_executor.py
@@ -497,6 +497,7 @@ def _get_service_exit_code(self, service_name: str,
                 try:
                     container_info = json.loads(line)
                 except json.JSONDecodeError:
+                    logger.error('Failed to parse container info line as JSON: %s', line, exc_info=True)
                     continue
                 if isinstance(container_info, list):
                     for entry in container_info:
@@ -505,7 +506,9 @@ def _get_service_exit_code(self, service_name: str,
                 elif container_info.get('Service') == service_name:
                     return container_info.get('ExitCode', 1)
 
-            logger.warning('No container info found for service "%s"', service_name)
+            logger.warning(
+                'No container info found for service "%s" in docker compose output:\n%s',
+                service_name, result.stdout.strip())
             return 1
         except (subprocess.TimeoutExpired, FileNotFoundError):
             logger.warning('Could not determine exit code for "%s"', service_name)
diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 47164d16a..290287185 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -25,6 +25,7 @@
 import shutil
 import subprocess
 import tempfile
+from collections import deque
 from typing import Any, Dict, List, Set
 
 import jinja2
@@ -128,6 +129,10 @@ def _compute_workflow_fingerprint(self) -> str:
         fingerprint_data: List[Dict[str, Any]] = []
         for name in sorted(self._task_nodes):
             spec = self._task_nodes[name].spec
+            files_data = [
+                {'path': f.path, 'contents': f.contents, 'base64': f.base64}
+                for f in sorted(spec.files, key=lambda f: f.path)
+            ] if spec.files else []
             fingerprint_data.append({
                 'name': name,
                 'image': spec.image,
@@ -136,6 +141,7 @@ def _compute_workflow_fingerprint(self) -> str:
                 'environment': dict(sorted(spec.environment.items())),
                 'inputs': [str(i) for i in spec.inputs],
                 'resource': spec.resource,
+                'files': files_data,
             })
         blob = json.dumps(fingerprint_data, sort_keys=True, separators=(',', ':'))
         return hashlib.sha256(blob.encode('utf-8')).hexdigest()
@@ -280,9 +286,9 @@ def _restore_completed_tasks(self, from_step: str | None = None):
     def _get_downstream_tasks(self, task_name: str) -> Set[str]:
         """Return all transitive downstream dependents of the given task via BFS."""
         visited: Set[str] = set()
-        queue = [task_name]
+        queue: deque[str] = deque([task_name])
         while queue:
-            current = queue.pop(0)
+            current = queue.popleft()
             for downstream in self._task_nodes[current].downstream:
                 if downstream not in visited:
                     visited.add(downstream)
@@ -466,9 +472,9 @@ def _find_ready_tasks(self) -> List[str]:
     def _cancel_downstream(self, failed_task: str):
         """Mark all transitive downstream tasks of a failed task as cancelled (exit_code -1)."""
         visited: Set[str] = set()
-        queue = [failed_task]
+        queue: deque[str] = deque([failed_task])
         while queue:
-            current = queue.pop(0)
+            current = queue.popleft()
             for downstream in self._task_nodes[current].downstream:
                 if downstream not in visited and downstream not in self._results:
                     visited.add(downstream)

From f6df82335cba2d2a4da7af6d55bbc65836de9140 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 18:53:40 -0700
Subject: [PATCH 29/35] Refactor Compose and Standalone Executors for improved
 command handling and state management

- Replaced string splitting with `shlex.split` in `ComposeExecutor` to ensure correct parsing of command arguments.
- Enhanced `_compute_workflow_fingerprint` in `StandaloneExecutor` to accept `WorkflowSpec` directly, improving clarity and functionality.
- Updated state file handling in `StandaloneExecutor` to use a temporary file for safer writes and added error handling for corrupt state files.
---
 src/utils/compose_executor.py    |  3 +-
 src/utils/standalone_executor.py | 58 +++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/src/utils/compose_executor.py b/src/utils/compose_executor.py
index e81f1ea88..97f490f48 100644
--- a/src/utils/compose_executor.py
+++ b/src/utils/compose_executor.py
@@ -20,6 +20,7 @@
 import logging
 import os
 import re
+import shlex
 import shutil
 import subprocess
 import tempfile
@@ -81,7 +82,7 @@ def _compose_project_name(self, spec: workflow_module.WorkflowSpec) -> str:
 
     def _compose_base_cmd(self, spec: workflow_module.WorkflowSpec) -> List[str]:
         return (
-            self._compose_cmd.split()
+            shlex.split(self._compose_cmd)
             + ['-p', self._compose_project_name(spec), '-f', self._compose_file_path]
         )
 
diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 290287185..6d608cccb 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -25,6 +25,7 @@
 import shutil
 import subprocess
 import tempfile
+import time
 from collections import deque
 from typing import Any, Dict, List, Set
 
@@ -124,23 +125,28 @@ def _detect_available_gpus(self) -> int:
             self._available_gpus = 0
         return self._available_gpus
 
-    def _compute_workflow_fingerprint(self) -> str:
+    def _compute_workflow_fingerprint(self, spec: workflow_module.WorkflowSpec) -> str:
         """Compute a deterministic SHA-256 hash from task specs to detect workflow changes across runs."""
         fingerprint_data: List[Dict[str, Any]] = []
         for name in sorted(self._task_nodes):
-            spec = self._task_nodes[name].spec
+            task_spec = self._task_nodes[name].spec
             files_data = [
                 {'path': f.path, 'contents': f.contents, 'base64': f.base64}
-                for f in sorted(spec.files, key=lambda f: f.path)
-            ] if spec.files else []
+                for f in sorted(task_spec.files, key=lambda f: f.path)
+            ] if task_spec.files else []
+            named_resource = spec.resources.get(task_spec.resource)
+            named_resource_dict = named_resource.model_dump(exclude_none=True) if named_resource else {}
+            inline_resource_dict = task_spec.resources.model_dump(exclude_defaults=True)
+            effective_resource = {**named_resource_dict, **inline_resource_dict}
             fingerprint_data.append({
                 'name': name,
-                'image': spec.image,
-                'command': spec.command,
-                'args': spec.args,
-                'environment': dict(sorted(spec.environment.items())),
-                'inputs': [str(i) for i in spec.inputs],
-                'resource': spec.resource,
+                'image': task_spec.image,
+                'command': task_spec.command,
+                'args': task_spec.args,
+                'environment': dict(sorted(task_spec.environment.items())),
+                'inputs': [str(i) for i in task_spec.inputs],
+                'resource': task_spec.resource,
+                'resource_config': effective_resource,
                 'files': files_data,
             })
         blob = json.dumps(fingerprint_data, sort_keys=True, separators=(',', ':'))
@@ -161,7 +167,7 @@ def execute(self, spec: workflow_module.WorkflowSpec,
         """Run all tasks in topological order, returning True if the entire workflow succeeds."""
         self._results.clear()
         self._build_dag(spec)
-        self._workflow_fingerprint = self._compute_workflow_fingerprint()
+        self._workflow_fingerprint = self._compute_workflow_fingerprint(spec)
         self._validate_for_standalone(spec)
         self._setup_directories()
 
@@ -235,15 +241,37 @@ def _save_state(self):
                 if result.exit_code != -1
             },
         }
-        with open(self._state_file_path, 'w', encoding='utf-8') as f:
+        tmp_path = self._state_file_path + '.tmp'
+        with open(tmp_path, 'w', encoding='utf-8') as f:
             json.dump(state, f, indent=2)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, self._state_file_path)
+        state_dir = os.path.dirname(self._state_file_path) or '.'
+        dir_fd = os.open(state_dir, os.O_RDONLY)
+        try:
+            os.fsync(dir_fd)
+        finally:
+            os.close(dir_fd)
 
     def _load_state(self) -> Dict | None:
-        """Load previously saved task state from disk, returning None if no state file exists."""
+        """Load previously saved task state from disk, returning None if no state file exists or if the file is corrupt."""
         if not os.path.exists(self._state_file_path):
             return None
-        with open(self._state_file_path, encoding='utf-8') as f:
-            return json.load(f)
+        try:
+            with open(self._state_file_path, encoding='utf-8') as f:
+                return json.load(f)
+        except (json.JSONDecodeError, IOError) as exc:
+            corrupt_path = f'{self._state_file_path}.corrupt.{int(time.time())}'
+            try:
+                os.rename(self._state_file_path, corrupt_path)
+                logger.warning(
+                    'State file is corrupt (%s); renamed to %s and starting fresh',
+                    exc, corrupt_path)
+            except OSError:
+                logger.warning(
+                    'State file is corrupt (%s); starting fresh', exc)
+            return None
 
     def _restore_completed_tasks(self, from_step: str | None = None):
         """Reload completed tasks from a previous run, optionally invalidating from a given step onward."""

From e9647a6961191c1c33033f87d8579521c5531a38 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 19:30:14 -0700
Subject: [PATCH 30/35] Refactor hash generation in StandaloneExecutor and
 improve cycle detection logic

- Updated the cycle detection logic to use list unpacking for clarity in `StandaloneExecutor`.
- Changed hash generation from MD5 to SHA-256 for improved security in `_expand_jinja_locally`, ensuring better uniqueness and collision resistance.
---
 src/utils/standalone_executor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 6d608cccb..5486386d5 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -377,7 +377,7 @@ def visit(name: str) -> List[str] | None:
                 return None
             if state[name] == IN_PROGRESS:
                 cycle_start = path.index(name)
-                return path[cycle_start:] + [name]
+                return [*path[cycle_start:], name]
 
             state[name] = IN_PROGRESS
             path.append(name)
@@ -694,7 +694,7 @@ def _expand_jinja_locally(spec_text: str,
     placeholder_map: Dict[str, str] = {}
     for match in _OSMO_TOKEN_PATTERN.finditer(file_text):
         field = match.group(1).strip()
-        hash_key = 'hash' + str(int(hashlib.md5(field.encode('utf-8')).hexdigest(), 16))
+        hash_key = 'hash' + str(int(hashlib.sha256(field.encode('utf-8')).hexdigest(), 16))
         original_token = match.group(0)
         template_data[hash_key] = original_token
         placeholder_map[original_token] = hash_key

From 3c91e1d0f9fb87fbfcc1d4fbd2feff19de372610 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 19:41:04 -0700
Subject: [PATCH 31/35] Refactor file writing logic in StandaloneExecutor to
 support base64 encoding

- Introduced base64 decoding for file writing in `StandaloneExecutor`, allowing for proper handling of encoded content.
- Ensured cleanup of rerun output directories is executed when resuming tasks, improving state management.
---
 src/utils/standalone_executor.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 5486386d5..05757e6c1 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -16,6 +16,7 @@
 SPDX-License-Identifier: Apache-2.0
 """
 
+import base64 as base64_module
 import dataclasses
 import hashlib
 import json
@@ -173,7 +174,8 @@ def execute(self, spec: workflow_module.WorkflowSpec,
 
         if resume or from_step:
             self._restore_completed_tasks(from_step)
-            self._clean_rerun_output_dirs()
+
+        self._clean_rerun_output_dirs()
 
         total_tasks = sum(len(g.tasks) for g in self._groups(spec))
         skipped = len(self._results)
@@ -537,8 +539,12 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 raise ValueError(
                     f'Task "{node.name}": file path "{file_spec.path}" escapes the task directory')
             os.makedirs(os.path.dirname(host_path), exist_ok=True)
-            with open(host_path, 'w', encoding='utf-8') as f:
-                f.write(resolved_contents)
+            if file_spec.base64:
+                with open(host_path, 'wb') as f:
+                    f.write(base64_module.b64decode(resolved_contents))
+            else:
+                with open(host_path, 'w', encoding='utf-8') as f:
+                    f.write(resolved_contents)
 
         resolved_command = [self._substitute_tokens(c, token_map) for c in task_spec.command]
         resolved_args = [self._substitute_tokens(a, token_map) for a in task_spec.args]

From 0bd4a99ee24cff9fab317886d8cdd883ae83d375 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Fri, 10 Apr 2026 21:50:47 -0700
Subject: [PATCH 32/35] Enhance credential handling in StandaloneExecutor

- Added support for credential data mapping in `_compute_workflow_fingerprint`, allowing for better management of task-specific credentials.
- Updated `_validate_for_standalone` to raise a ValueError for unsupported timeout features in standalone mode, improving error handling and user guidance.
- Ensured directory setup is executed correctly during workflow execution, enhancing state management.
---
 src/utils/standalone_executor.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index 05757e6c1..d3ac8a94e 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -139,6 +139,14 @@ def _compute_workflow_fingerprint(self, spec: workflow_module.WorkflowSpec) -> s
             named_resource_dict = named_resource.model_dump(exclude_none=True) if named_resource else {}
             inline_resource_dict = task_spec.resources.model_dump(exclude_defaults=True)
             effective_resource = {**named_resource_dict, **inline_resource_dict}
+            credentials_data = {
+                cred_name: {
+                    'mount_path': cred_mount,
+                    'source': os.path.abspath(self._credentials[cred_name])
+                    if cred_name in self._credentials else None,
+                }
+                for cred_name, cred_mount in sorted(task_spec.credentials.items())
+            } if task_spec.credentials else {}
             fingerprint_data.append({
                 'name': name,
                 'image': task_spec.image,
@@ -149,6 +157,7 @@ def _compute_workflow_fingerprint(self, spec: workflow_module.WorkflowSpec) -> s
                 'resource': task_spec.resource,
                 'resource_config': effective_resource,
                 'files': files_data,
+                'credentials': credentials_data,
             })
         blob = json.dumps(fingerprint_data, sort_keys=True, separators=(',', ':'))
         return hashlib.sha256(blob.encode('utf-8')).hexdigest()
@@ -170,11 +179,11 @@ def execute(self, spec: workflow_module.WorkflowSpec,
         self._build_dag(spec)
         self._workflow_fingerprint = self._compute_workflow_fingerprint(spec)
         self._validate_for_standalone(spec)
-        self._setup_directories()
 
         if resume or from_step:
             self._restore_completed_tasks(from_step)
 
+        self._setup_directories()
         self._clean_rerun_output_dirs()
 
         total_tasks = sum(len(g.tasks) for g in self._groups(spec))
@@ -402,6 +411,12 @@ def visit(name: str) -> List[str] | None:
     def _validate_for_standalone(self, spec: workflow_module.WorkflowSpec):
         """Raise ValueError if the spec uses features unsupported in standalone mode."""
         unsupported_features = []
+
+        if spec.timeout.exec_timeout is not None or spec.timeout.queue_timeout is not None:
+            unsupported_features.append(
+                'WorkflowSpec.timeout is not supported in standalone mode; '
+                'use the service executor or remove the timeout')
+
         for group in self._groups(spec):
             for task_spec in group.tasks:
                 for input_source in task_spec.inputs:

From e782e4de1b8551532f3de1abaaa5c783ea52edfa Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Wed, 27 May 2026 11:58:53 -0700
Subject: [PATCH 33/35] Refactor GPU handling in StandaloneExecutor

- Updated GPU allocation logic to use 'all' when requested GPUs exceed available resources, improving resource management.
- Adjusted logging messages for clarity when running tasks without GPU support or with limited GPU availability.
- Ensured correct formatting for GPU device specifications in Docker arguments.
---
 src/utils/standalone_executor.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index d3ac8a94e..ea8701a3a 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -578,13 +578,14 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                 logger.warning(
                     'Task "%s" requests %d GPU(s) but no GPUs are available — running without GPU support',
                     node.name, gpu_count)
-            elif gpu_count > available:
-                logger.warning(
-                    'Task "%s" requests %d GPU(s) but only %d available — running with %d GPU(s)',
-                    node.name, gpu_count, available, available)
-                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(available))}']
+            elif gpu_count >= available:
+                if gpu_count > available:
+                    logger.warning(
+                        'Task "%s" requests %d GPU(s) but only %d available — running with %d GPU(s)',
+                        node.name, gpu_count, available, available)
+                docker_args += ['--gpus', 'all']
             else:
-                docker_args += ['--gpus', f'device={",".join(str(i) for i in range(gpu_count))}']
+                docker_args += ['--gpus', f'"device={",".join(str(i) for i in range(gpu_count))}"']
             logger.info('Task "%s" requesting %d GPU(s), using %d', node.name, gpu_count, min(gpu_count, available))
 
             docker_args += ['--shm-size', self._shm_size or self.DEFAULT_SHM_SIZE]

From 1be5606225718167353b5d333c5882c094c6ebcf Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 4 Jun 2026 11:39:31 -0700
Subject: [PATCH 34/35] Refactor file handling and enhance test coverage in
 StandaloneExecutor and ComposeExecutor

- Updated file writing logic in `StandaloneExecutor` to use more descriptive variable names for file handlers, improving code readability.
- Enhanced test cases in `TestComposeValidation` and `TestJinjaTemplateDetection` to better validate credential handling and Jinja template expansion, ensuring robustness in workflow specifications.
- Adjusted error messages for clarity in credential validation tests, providing more informative feedback for users.
---
 src/utils/standalone_executor.py            |   8 +-
 src/utils/tests/test_compose_executor.py    |  74 ++++++++-----
 src/utils/tests/test_standalone_executor.py | 116 +++++++++-----------
 3 files changed, 99 insertions(+), 99 deletions(-)

diff --git a/src/utils/standalone_executor.py b/src/utils/standalone_executor.py
index ea8701a3a..7aba35fbf 100644
--- a/src/utils/standalone_executor.py
+++ b/src/utils/standalone_executor.py
@@ -555,11 +555,11 @@ def _run_task(self, node: TaskNode, spec: workflow_module.WorkflowSpec) -> TaskR
                     f'Task "{node.name}": file path "{file_spec.path}" escapes the task directory')
             os.makedirs(os.path.dirname(host_path), exist_ok=True)
             if file_spec.base64:
-                with open(host_path, 'wb') as f:
-                    f.write(base64_module.b64decode(resolved_contents))
+                with open(host_path, 'wb') as binary_file:
+                    binary_file.write(base64_module.b64decode(resolved_contents))
             else:
-                with open(host_path, 'w', encoding='utf-8') as f:
-                    f.write(resolved_contents)
+                with open(host_path, 'w', encoding='utf-8') as text_file:
+                    text_file.write(resolved_contents)
 
         resolved_command = [self._substitute_tokens(c, token_map) for c in task_spec.command]
         resolved_args = [self._substitute_tokens(a, token_map) for a in task_spec.args]
diff --git a/src/utils/tests/test_compose_executor.py b/src/utils/tests/test_compose_executor.py
index e824d2b9a..a2276b011 100644
--- a/src/utils/tests/test_compose_executor.py
+++ b/src/utils/tests/test_compose_executor.py
@@ -32,7 +32,12 @@
     ComposeExecutor,
     run_workflow_compose,
 )
-from src.utils.standalone_executor import CONTAINER_DATA_PATH, TaskResult
+from src.utils.standalone_executor import (
+    CONTAINER_DATA_PATH,
+    TaskResult,
+    _expand_jinja_locally,
+    _spec_has_templates,
+)
 
 
 def _docker_compose_available() -> bool:
@@ -437,8 +442,8 @@ def test_dataset_input_rejected(self):
             executor._validate_for_compose(spec)
         self.assertIn('dataset', str(context.exception))
 
-    def test_credentials_rejected(self):
-        """Credentials are rejected in compose mode."""
+    def test_credential_not_provided_rejected(self):
+        """A credential required by a task but not supplied via --credential is rejected."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: bad
@@ -454,7 +459,25 @@ def test_credentials_rejected(self):
         executor._build_dag(spec)
         with self.assertRaises(ValueError) as context:
             executor._validate_for_compose(spec)
-        self.assertIn('credentials', str(context.exception))
+        self.assertIn('credential', str(context.exception))
+
+    def test_provided_credential_passes(self):
+        """A credential supplied via --credential is accepted in compose mode."""
+        spec_text = textwrap.dedent('''\
+            workflow:
+              name: ok
+              tasks:
+              - name: task
+                image: ubuntu:24.04
+                command: ["echo"]
+                credentials:
+                  my-secret: NGC_API_KEY
+        ''')
+        executor = ComposeExecutor(
+            work_dir='/tmp/unused', credentials={'my-secret': '/tmp/secret-dir'})
+        spec = executor.load_spec(spec_text)
+        executor._build_dag(spec)
+        executor._validate_for_compose(spec)
 
     def test_simple_spec_passes(self):
         """A simple spec with only task-to-task inputs passes compose validation."""
@@ -637,33 +660,26 @@ def test_name_with_special_chars(self):
 
 
 class TestJinjaTemplateDetection(unittest.TestCase):
-    """Verify that Jinja templates are rejected before execution."""
+    """Verify that Jinja templates are detected and expanded locally before execution."""
 
-    def _write_temp_spec(self, content: str) -> str:
-        f = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False)
-        f.write(content)
-        f.flush()
-        f.close()
-        return f.name
-
-    def test_jinja_block_detected(self):
-        path = self._write_temp_spec(textwrap.dedent('''\
+    def test_jinja_block_expanded(self):
+        """A spec with {% %} Jinja block tags is detected and expanded locally."""
+        spec_text = textwrap.dedent('''\
             workflow:
               name: {% if true %}test{% endif %}
               tasks:
               - name: task
                 image: alpine:3.18
                 command: ["echo"]
-        '''))
-        try:
-            with self.assertRaises(ValueError) as context:
-                run_workflow_compose(path)
-            self.assertIn('Jinja', str(context.exception))
-        finally:
-            os.unlink(path)
+        ''')
+        self.assertTrue(_spec_has_templates(spec_text))
+        expanded = _expand_jinja_locally(spec_text)
+        self.assertNotIn('{%', expanded)
+        self.assertIn('name: test', expanded)
 
-    def test_default_values_detected(self):
-        path = self._write_temp_spec(textwrap.dedent('''\
+    def test_default_values_expanded(self):
+        """A spec with a default-values section has its {{ }} variables expanded locally."""
+        spec_text = textwrap.dedent('''\
             workflow:
               name: "{{experiment}}"
               tasks:
@@ -672,13 +688,11 @@ def test_default_values_detected(self):
                 command: ["echo"]
             default-values:
               experiment: test
-        '''))
-        try:
-            with self.assertRaises(ValueError) as context:
-                run_workflow_compose(path)
-            self.assertIn('Jinja', str(context.exception))
-        finally:
-            os.unlink(path)
+        ''')
+        self.assertTrue(_spec_has_templates(spec_text))
+        expanded = _expand_jinja_locally(spec_text)
+        self.assertNotIn('{{', expanded)
+        self.assertIn('test', expanded)
 
 
 class TestUnresolvedTokenDetection(unittest.TestCase):
diff --git a/src/utils/tests/test_standalone_executor.py b/src/utils/tests/test_standalone_executor.py
index 2eae253f6..debf61568 100644
--- a/src/utils/tests/test_standalone_executor.py
+++ b/src/utils/tests/test_standalone_executor.py
@@ -25,7 +25,15 @@
 from unittest import mock
 
 from src.utils.job import task as task_module
-from src.utils.standalone_executor import CONTAINER_DATA_PATH, StandaloneExecutor, TaskNode, TaskResult, run_workflow_standalone
+from src.utils.standalone_executor import (
+    CONTAINER_DATA_PATH,
+    StandaloneExecutor,
+    TaskNode,
+    TaskResult,
+    _expand_jinja_locally,
+    _spec_has_templates,
+    run_workflow_standalone,
+)
 
 
 # ---------------------------------------------------------------------------
@@ -784,11 +792,11 @@ def test_url_input_rejected(self):
             executor._validate_for_standalone(spec)
         self.assertIn('URL', str(context.exception))
 
-    def test_dataset_output_rejected(self):
-        """A spec with dataset outputs is rejected as unsupported in standalone mode."""
+    def test_dataset_output_ignored(self):
+        """A spec with dataset outputs passes validation; the output is ignored in standalone mode."""
         spec_text = textwrap.dedent('''\
             workflow:
-              name: bad
+              name: ok
               tasks:
               - name: task
                 image: ubuntu:24.04
@@ -800,9 +808,7 @@ def test_dataset_output_rejected(self):
         executor = self._make_executor()
         spec = executor.load_spec(spec_text)
         executor._build_dag(spec)
-        with self.assertRaises(ValueError) as context:
-            executor._validate_for_standalone(spec)
-        self.assertIn('dataset', str(context.exception).lower())
+        executor._validate_for_standalone(spec)
 
     def test_url_output_rejected(self):
         """A spec with URL outputs is rejected as unsupported in standalone mode."""
@@ -906,7 +912,7 @@ class TestValidateForStandaloneRemainingBranches(unittest.TestCase):
                     credentials:
                       my-secret: NGC_API_KEY
             '''),
-            'expected_substring': 'credentials',
+            'expected_substring': 'credential',
         },
         'checkpoint': {
             'yaml': textwrap.dedent('''\
@@ -1350,8 +1356,8 @@ def test_valid_osmo_tokens_not_flagged(self, mock_run):
         spec = executor.load_spec(spec_text)
         executor.execute(spec)
 
-    def test_error_message_suggests_dry_run(self):
-        """The unresolved token error message suggests using --dry-run to expand templates."""
+    def test_error_message_suggests_set(self):
+        """The unresolved token error message suggests using --set to provide values."""
         spec_text = textwrap.dedent('''\
             workflow:
               name: helpful
@@ -1364,7 +1370,7 @@ def test_error_message_suggests_dry_run(self):
         spec = executor.load_spec(spec_text)
         with self.assertRaises(ValueError) as context:
             executor.execute(spec)
-        self.assertIn('dry-run', str(context.exception))
+        self.assertIn('--set', str(context.exception))
 
 
 class TestShmSize(unittest.TestCase):
@@ -1482,36 +1488,26 @@ def test_non_gpu_task_gets_explicit_shm_size(self, mock_run):
 
 
 class TestJinjaTemplateDetection(unittest.TestCase):
-    """Verify that specs containing Jinja template markers are rejected before execution."""
-
-    def _write_temp_spec(self, content: str) -> str:
-        """Write YAML content to a temporary file and return its path."""
-        f = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False)
-        f.write(content)
-        f.flush()
-        f.close()
-        return f.name
+    """Verify that specs containing Jinja template markers are expanded locally before execution."""
 
-    def test_jinja_block_detected(self):
-        """A spec containing {% %} Jinja block tags is rejected."""
-        path = self._write_temp_spec(textwrap.dedent('''\
+    def test_jinja_block_expanded(self):
+        """A spec containing {% %} Jinja block tags is detected and expanded locally."""
+        spec_text = textwrap.dedent('''\
             workflow:
               name: {% if true %}test{% endif %}
               tasks:
               - name: task
                 image: alpine:3.18
                 command: ["echo"]
-        '''))
-        try:
-            with self.assertRaises(ValueError) as context:
-                run_workflow_standalone(path)
-            self.assertIn('Jinja', str(context.exception))
-        finally:
-            os.unlink(path)
+        ''')
+        self.assertTrue(_spec_has_templates(spec_text))
+        expanded = _expand_jinja_locally(spec_text)
+        self.assertNotIn('{%', expanded)
+        self.assertIn('name: test', expanded)
 
-    def test_jinja_comment_detected(self):
-        """A spec containing {# #} Jinja comment tags is rejected."""
-        path = self._write_temp_spec(textwrap.dedent('''\
+    def test_jinja_comment_expanded(self):
+        """A spec containing {# #} Jinja comment tags is detected and stripped locally."""
+        spec_text = textwrap.dedent('''\
             {# A comment #}
             workflow:
               name: test
@@ -1519,17 +1515,15 @@ def test_jinja_comment_detected(self):
               - name: task
                 image: alpine:3.18
                 command: ["echo"]
-        '''))
-        try:
-            with self.assertRaises(ValueError) as context:
-                run_workflow_standalone(path)
-            self.assertIn('Jinja', str(context.exception))
-        finally:
-            os.unlink(path)
+        ''')
+        self.assertTrue(_spec_has_templates(spec_text))
+        expanded = _expand_jinja_locally(spec_text)
+        self.assertNotIn('{#', expanded)
+        self.assertIn('name: test', expanded)
 
-    def test_default_values_section_detected(self):
-        """A spec containing a 'default-values' section is rejected as a Jinja template."""
-        path = self._write_temp_spec(textwrap.dedent('''\
+    def test_default_values_section_expanded(self):
+        """A spec containing a 'default-values' section has its variables expanded locally."""
+        spec_text = textwrap.dedent('''\
             workflow:
               name: "{{experiment_name}}"
               tasks:
@@ -1538,13 +1532,11 @@ def test_default_values_section_detected(self):
                 command: ["echo"]
             default-values:
               experiment_name: my-experiment
-        '''))
-        try:
-            with self.assertRaises(ValueError) as context:
-                run_workflow_standalone(path)
-            self.assertIn('Jinja', str(context.exception))
-        finally:
-            os.unlink(path)
+        ''')
+        self.assertTrue(_spec_has_templates(spec_text))
+        expanded = _expand_jinja_locally(spec_text)
+        self.assertNotIn('{{', expanded)
+        self.assertIn('my-experiment', expanded)
 
 
 # ============================================================================
@@ -1620,24 +1612,18 @@ def test_unsupported_spec_data_upload(self):
             self._run_cookbook_spec('data_upload.yaml')
         self.assertIn('object storage', str(context.exception).lower())
 
-    def test_unsupported_spec_dataset_upload(self):
-        """dataset_upload.yaml uses dataset outputs — verify it is cleanly rejected."""
-        with self.assertRaises(ValueError) as context:
-            self._run_cookbook_spec('dataset_upload.yaml')
-        self.assertIn('dataset', str(context.exception).lower())
-
-    def test_unsupported_spec_template(self):
-        """template_hello_world.yaml uses default-values templating — verify it is rejected."""
+    def test_template_spec_expanded_locally(self):
+        """template_hello_world.yaml uses default-values templating — verify it expands locally."""
         spec_path = os.path.join(self.COOKBOOK_DIR, 'template_hello_world.yaml')
         self.assertTrue(os.path.exists(spec_path),
                         f'Cookbook file not found: {spec_path}')
-        with self.assertRaises(ValueError) as context:
-            run_workflow_standalone(
-                spec_path=spec_path,
-                work_dir=self.work_dir,
-                keep_work_dir=True,
-            )
-        self.assertIn('Jinja', str(context.exception))
+        with open(spec_path, encoding='utf-8') as f:
+            spec_text = f.read()
+        self.assertTrue(_spec_has_templates(spec_text))
+        expanded = _expand_jinja_locally(spec_text)
+        self.assertNotIn('{{', expanded)
+        self.assertIn('hello-osmo', expanded)
+        self.assertIn('Hello from OSMO!', expanded)
 
 
 class TestRunWorkflowStandaloneErrors(unittest.TestCase):

From 3cce831b863a00c50e4f88247b3ebed031d66b45 Mon Sep 17 00:00:00 2001
From: Mauricio Franco <mfrancomarti@nvidia.com>
Date: Thu, 4 Jun 2026 11:59:08 -0700
Subject: [PATCH 35/35] Refactor test setup in TestComposeValidation to use
 temporary directories for credential handling

- Updated the test case to create a temporary directory for storing credentials, improving isolation and preventing side effects during tests.
- Enhanced readability by removing hardcoded paths and utilizing context management for temporary directory creation.
---
 src/utils/tests/test_compose_executor.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/utils/tests/test_compose_executor.py b/src/utils/tests/test_compose_executor.py
index a2276b011..f70ea8129 100644
--- a/src/utils/tests/test_compose_executor.py
+++ b/src/utils/tests/test_compose_executor.py
@@ -473,11 +473,14 @@ def test_provided_credential_passes(self):
                 credentials:
                   my-secret: NGC_API_KEY
         ''')
-        executor = ComposeExecutor(
-            work_dir='/tmp/unused', credentials={'my-secret': '/tmp/secret-dir'})
-        spec = executor.load_spec(spec_text)
-        executor._build_dag(spec)
-        executor._validate_for_compose(spec)
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            secret_dir = os.path.join(tmp_dir, 'secret-dir')
+            os.makedirs(secret_dir)
+            executor = ComposeExecutor(
+                work_dir=tmp_dir, credentials={'my-secret': secret_dir})
+            spec = executor.load_spec(spec_text)
+            executor._build_dag(spec)
+            executor._validate_for_compose(spec)
 
     def test_simple_spec_passes(self):
         """A simple spec with only task-to-task inputs passes compose validation."""