diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/README.md b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/README.md
new file mode 100644
index 00000000..2f6d863c
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/README.md
@@ -0,0 +1,3 @@
+# Vector Addition 2^28 Throughput
+
+This challenge ports Frontier-CS `research/problems/vector_addition/2_28` into Agentics as a `coexecuted_benchmark` payload. Public validation is tiny; official configuration/data is supplied through the private `official-runs` overlay. The private overlay contains no secrets.
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/agentics.challenge.json b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/agentics.challenge.json
new file mode 100644
index 00000000..ce3c6ea8
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/agentics.challenge.json
@@ -0,0 +1,34 @@
+{
+  "schema_version": 1,
+  "request": "new_challenge",
+  "challenge_name": "vector-add-2-28-frontier-cs-vector-add-2-28",
+  "title": "Vector Addition 2^28 Throughput",
+  "summary": {
+    "en": "Optimize a Triton vector-addition kernel for 2^28 CUDA elements.",
+    "zh": "Optimize a Triton vector-addition kernel for 2^28 CUDA elements."
+  },
+  "keywords": [
+    "cuda",
+    "vector",
+    "triton"
+  ],
+  "readme_path": "README.md",
+  "bundle_path": "v1",
+  "private_assets": [
+    {
+      "asset_name": "official-runs",
+      "kind": "private_benchmark_data",
+      "required": true,
+      "required_paths": [
+        "private-benchmark/config.json",
+        "private-benchmark/submission_spec.json"
+      ],
+      "asset_note": "Private official data/config for Frontier-CS `research/problems/vector_addition/2_28`."
+    }
+  ],
+  "ci": {
+    "validate_manifest": true,
+    "validate_public_bundle": true,
+    "smoke_test_public_validation": false
+  }
+}
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/coexecuted-evaluator/run.py b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/coexecuted-evaluator/run.py
new file mode 100644
index 00000000..9ddfbf21
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/coexecuted-evaluator/run.py
@@ -0,0 +1,410 @@
+from __future__ import annotations
+
+import argparse
+import contextlib
+import importlib.util
+import io
+import json
+import math
+import os
+import sys
+from pathlib import Path
+from typing import Any
+
+ENV_PROJECT_DIR = "evaluator-env"
+ENV_ACTIVE = "AGENTICS_EVALUATOR_ENV_ACTIVE"
+MAX_LOG_CHARS = 4000
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Agentics coexecuted evaluator wrapper")
+    parser.add_argument("--challenge-dir", required=True)
+    parser.add_argument("--workspace-dir", required=True)
+    parser.add_argument("--output-path", required=True)
+    parser.add_argument("--mode", choices=["validation", "official"], required=True)
+    parser.add_argument("--target", required=True)
+    parser.add_argument("--setup-dir")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    output_path = Path(args.output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    configure_runtime_cache(output_path.parent)
+    maybe_reexec_with_setup_python(args)
+
+    challenge_dir = Path(args.challenge_dir).resolve()
+    workspace_dir = Path(args.workspace_dir).resolve()
+    config = load_mode_config(challenge_dir, args.mode)
+    declared_metrics = declared_metric_names(challenge_dir)
+    logs: list[str] = []
+    try:
+        with captured_logs(logs):
+            result = dispatch(config, challenge_dir, workspace_dir, output_path.parent, args.mode)
+    except Exception as exc:  # noqa: BLE001 - result.json must explain evaluator failures.
+        result = {"status": "error", "score": 0.0, "score_unbounded": 0.0, "runs_successfully": 0.0, "error": str(exc)}
+    write_agentics_result(output_path, args.mode, result, logs, declared_metrics)
+    return 0
+
+
+def configure_runtime_cache(output_root: Path) -> None:
+    tmp_root = output_root / "tmp"
+    tmp_root.mkdir(parents=True, exist_ok=True)
+    os.environ.setdefault("HOME", str(output_root))
+    os.environ.setdefault("TMPDIR", str(tmp_root))
+    os.environ.setdefault("XDG_CACHE_HOME", str(tmp_root / "cache"))
+    os.environ.setdefault("TRITON_CACHE_DIR", str(tmp_root / "triton-cache"))
+    os.environ.setdefault("PYTHONDONTWRITEBYTECODE", "1")
+
+
+def maybe_reexec_with_setup_python(args: argparse.Namespace) -> None:
+    if os.environ.get(ENV_ACTIVE) == "1" or not args.setup_dir:
+        return
+    venv_python = Path(args.setup_dir) / ENV_PROJECT_DIR / ".venv" / "bin" / "python"
+    if not venv_python.is_file():
+        return
+    env = os.environ.copy()
+    env[ENV_ACTIVE] = "1"
+    os.execve(str(venv_python), [str(venv_python), *sys.argv], env)
+
+
+def load_mode_config(challenge_dir: Path, mode: str) -> dict[str, Any]:
+    path = challenge_dir / ("public/config.json" if mode == "validation" else "private-benchmark/config.json")
+    if not path.is_file():
+        raise RuntimeError(f"missing {mode} config at {path}")
+    payload = json.loads(path.read_text(encoding="utf-8"))
+    if not isinstance(payload, dict):
+        raise RuntimeError("mode config must be a JSON object")
+    return payload
+
+
+def declared_metric_names(challenge_dir: Path) -> set[str]:
+    payload = json.loads((challenge_dir / "spec.json").read_text(encoding="utf-8"))
+    metrics = payload.get("metric_schema", {}).get("metrics", [])
+    if not isinstance(metrics, list):
+        return set()
+    names: set[str] = set()
+    for metric in metrics:
+        if isinstance(metric, dict) and isinstance(metric.get("name"), str):
+            names.add(metric["name"])
+    return names
+
+
+@contextlib.contextmanager
+def captured_logs(logs: list[str]):
+    stdout = io.StringIO()
+    stderr = io.StringIO()
+    with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
+        yield
+    text = (stdout.getvalue() + "\n" + stderr.getvalue()).strip()
+    if text:
+        logs.append(truncate(text))
+
+
+def dispatch(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path, output_dir: Path, mode: str) -> dict[str, Any]:
+    runner = config.get("runner")
+    if runner == "frontier_python_evaluate":
+        return run_frontier_python_evaluate(config, challenge_dir, workspace_dir, output_dir)
+    if runner == "sql_fuzzer":
+        return run_sql_fuzzer(config, challenge_dir, workspace_dir, output_dir)
+    if runner == "imagenet_pareto":
+        return run_imagenet(config, challenge_dir, workspace_dir)
+    if runner == "llm_router":
+        return run_llm_router(config, challenge_dir, workspace_dir)
+    if runner == "llm_sql":
+        return run_llm_sql(config, challenge_dir, workspace_dir, output_dir)
+    if runner == "symbolic_regression":
+        return run_symbolic(config, challenge_dir, workspace_dir)
+    if runner == "vdb_pareto":
+        return run_vdb(config, challenge_dir, workspace_dir)
+    if runner == "nbody":
+        return run_nbody(config, challenge_dir, workspace_dir)
+    raise RuntimeError(f"unsupported runner {runner!r}")
+
+
+def import_module(path: Path, name: str) -> Any:
+    spec = importlib.util.spec_from_file_location(name, path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"failed to import {path}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+def run_frontier_python_evaluate(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path, output_dir: Path) -> dict[str, Any]:
+    sys.path.insert(0, str(challenge_dir / "resources"))
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_source_evaluator")
+    apply_benchmark_override(source, config)
+    solution_path = workspace_dir / "solution.py"
+    spec_path = challenge_dir / str(config.get("submission_spec_path", "resources/submission_spec.json"))
+    cwd = Path.cwd()
+    output_dir.mkdir(parents=True, exist_ok=True)
+    try:
+        os.chdir(output_dir)
+        return source.evaluate(solution_path, spec_path)
+    finally:
+        os.chdir(cwd)
+
+
+def apply_benchmark_override(source: Any, config: dict[str, Any]) -> None:
+    override = config.get("benchmark_override")
+    if not override:
+        return
+    if override == "vector_sizes":
+        sizes = [int(value) for value in config.get("sizes", [])]
+        if sizes:
+            source._determine_large_test_sizes = lambda: sizes
+        if "num_samples" in config:
+            source.NUM_VECTOR_SAMPLES = int(config["num_samples"])
+        if "gpu_warmups" in config:
+            source.GPU_WARMUP_ITERS = int(config["gpu_warmups"])
+        if "inner_warmups" in config:
+            source.INNER_ADD_WARMUP_ITERS = int(config["inner_warmups"])
+        return
+
+    import benchmark  # type: ignore
+
+    if override == "gemm_shapes":
+        shapes = [tuple(item) for item in config["shapes"]]
+        baseline = source.baseline_matmul
+        def run_benchmark(answer, baseline_matmul=baseline, print_output=False):
+            rows = [benchmark._bench_pair(int(m), int(n), int(k), answer, baseline_matmul) for m, n, k in shapes]
+            return summarize_rows(rows)
+        source.run_benchmark = run_benchmark
+        return
+
+    if override == "quant_dot_shapes":
+        shapes = [tuple(item) for item in config["shapes"]]
+        baseline = source.baseline_quant_dot
+        def run_benchmark(answer, baseline_fn=baseline, print_output=False):
+            rows = [benchmark._bench_pair(int(m), int(n), answer, baseline_fn) for m, n in shapes]
+            return summarize_rows(rows)
+        source.run_benchmark = run_benchmark
+        return
+
+    if override == "qknorm_shapes":
+        shapes = [tuple(item) for item in config["shapes"]]
+        baseline = source.baseline_qknorm
+        def run_benchmark(answer, baseline_fn=baseline, print_output=False):
+            rows = [benchmark._bench_pair(int(b), int(kv), int(qo), int(hd), answer, baseline_fn) for b, kv, qo, hd in shapes]
+            return summarize_rows(rows)
+        source.run_benchmark = run_benchmark
+        return
+
+    raise RuntimeError(f"unknown benchmark override {override}")
+
+
+def summarize_rows(rows: list[dict[str, Any]]) -> dict[str, Any]:
+    speedups: list[float] = []
+    for row in rows:
+        answer = finite(row.get("answer_ms", 0.0))
+        baseline = finite(row.get("baseline_ms", row.get("gpu_baseline_ms", 0.0)))
+        if answer > 0 and baseline > 0:
+            speedups.append(baseline / answer)
+    if speedups:
+        arith = sum(speedups) / len(speedups)
+        geo = math.exp(sum(math.log(max(value, 1e-12)) for value in speedups) / len(speedups))
+        median = sorted(speedups)[len(speedups) // 2]
+    else:
+        arith = geo = median = 0.0
+    return {
+        "rows": rows,
+        "arithmetic_mean_speedup": arith,
+        "geometric_mean_speedup": geo,
+        "median_speedup": median,
+        "pass_all": all(bool(row.get("close_passed")) for row in rows),
+    }
+
+
+def run_sql_fuzzer(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path, output_dir: Path) -> dict[str, Any]:
+    sys.path.insert(0, str(challenge_dir / "resources"))
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_sql_fuzzer_evaluator")
+    solution_path = workspace_dir / "solution.py"
+    module = source.load_solution_module(solution_path)
+    solution = module.Solution()
+    artifact = solution.solve(str(challenge_dir / "resources"))
+    cwd = Path.cwd()
+    try:
+        os.chdir(output_dir)
+        artifact_path = source.materialize_artifact(artifact, solution_path)
+        fuzz = source.load_fuzzer_from_artifact(artifact_path)
+        result = source.evaluate_fuzzer(fuzz, challenge_dir / "resources", time_budget=float(config.get("time_budget_sec", 1.0)))
+    finally:
+        os.chdir(cwd)
+    return {"status": "success", "runs_successfully": 1.0, **result}
+
+
+def run_imagenet(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path) -> dict[str, Any]:
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_imagenet_evaluator")
+    for name, value in config.get("sample_overrides", {}).items():
+        if hasattr(source, name):
+            setattr(source, name, int(value))
+    module = source.load_solution_module(workspace_dir / "solution.py")
+    cls = getattr(module, "Solution")
+    evaluator = source.Evaluator()
+    return evaluator.evaluate(cls())
+
+
+def run_llm_router(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path) -> dict[str, Any]:
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_llm_router_evaluator")
+    evaluator = source.Evaluator(str(challenge_dir))
+    evaluator.trace_files = [str(challenge_dir / path) for path in config.get("datasets", [])]
+    return evaluator.evaluate(str(workspace_dir / "solution.py"))
+
+
+def run_llm_sql(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path, output_dir: Path) -> dict[str, Any]:
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_llm_sql_evaluator")
+    evaluator = source.Evaluator(str(challenge_dir))
+    evaluator.trace_files = [str(challenge_dir / path) for path in config.get("datasets", [])]
+    if "col_merges" in config:
+        evaluator.col_merges = config["col_merges"]
+    cache_dir = output_dir / "tmp"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    evaluator.baseline_cache_file = str(cache_dir / "baseline_cache.json")
+    return evaluator.evaluate(str(workspace_dir / "solution.py"))
+
+
+def run_symbolic(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path) -> dict[str, Any]:
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_symbolic_evaluator")
+    refs = source.load_reference_metrics(challenge_dir / config["reference_path"])
+    data_dir = challenge_dir / config["data_dir"]
+    data_files = sorted(data_dir.glob("*.csv"))
+    datasets = {path.name: path for path in data_files if path.name in refs}
+    if not datasets:
+        raise RuntimeError("no symbolic regression datasets matched reference metrics")
+    module = source.load_solution_module(workspace_dir / "solution.py")
+    by_dataset = source.evaluate(module, datasets, refs)
+    scores = [float(entry["score"]) for entry in by_dataset.values()]
+    scores_unbounded = [float(entry["score_unbounded"]) for entry in by_dataset.values()]
+    mse_values = [float(entry["mse"]) for entry in by_dataset.values()]
+    return {
+        "status": "success",
+        "runs_successfully": 1.0,
+        "score": sum(scores) / len(scores),
+        "score_unbounded": sum(scores_unbounded) / len(scores_unbounded),
+        "metrics": {
+            "mean_mse": sum(mse_values) / len(mse_values),
+            "num_datasets": len(by_dataset),
+        },
+    }
+
+
+def run_vdb(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path) -> dict[str, Any]:
+    blocked = config.get("blocked_reason")
+    if blocked:
+        raise RuntimeError(str(blocked))
+    source = import_module(challenge_dir / "source-evaluator.py", "frontier_vdb_evaluator")
+    module = source.load_solution_module(workspace_dir / "solution.py")
+    index_class = source.find_solution_class(module)
+    if config.get("dataset") == "synthetic":
+        import numpy as np
+        rng = np.random.default_rng(int(config.get("seed", 2026)))
+        dim = int(config.get("dim", 16))
+        base = int(config.get("base_vectors", 128))
+        queries = int(config.get("queries", 16))
+        xb = rng.normal(size=(base, dim)).astype("float32")
+        xq = rng.normal(size=(queries, dim)).astype("float32")
+        distances = ((xq[:, None, :] - xb[None, :, :]) ** 2).sum(axis=2)
+        gt = np.argsort(distances, axis=1)[:, :1].astype("int64")
+        index = index_class(dim)
+        index.add(xb)
+        metrics = source.evaluate_index(index, xq, gt, int(config.get("k", 1)))
+        score = source.compute_score(metrics)
+        unbounded_cfg = dict(source.SCORE_CONFIG)
+        unbounded_cfg["scoring"] = dict(unbounded_cfg["scoring"])
+        unbounded_cfg["scoring"]["max_score"] = float("inf")
+        unbounded_cfg["scoring"]["min_score"] = float("-inf")
+        score_unbounded = source.compute_score(metrics, unbounded_cfg)
+        return {"status": "success", "runs_successfully": 1.0, "score": score, "score_unbounded": score_unbounded, "metrics": metrics}
+    return source.evaluate(workspace_dir / "solution.py", k=int(config.get("k", 1)))
+
+
+def run_nbody(config: dict[str, Any], challenge_dir: Path, workspace_dir: Path) -> dict[str, Any]:
+    common = import_module(challenge_dir / "nbody-common/evaluator_common.py", "frontier_nbody_common")
+    cfg = common.VariantConfig(
+        num_particles=int(config["num_particles"]),
+        num_iterations=int(config["num_iterations"]),
+        space_size=float(config["space_size"]),
+        num_runs=int(config["num_runs"]),
+        min_speedup=float(config["min_speedup"]),
+        max_speedup=float(config["max_speedup"]),
+    )
+    return common.evaluate(workspace_dir / "solution.cpp", challenge_dir / "nbody-common", cfg)
+
+
+def write_agentics_result(output_path: Path, mode: str, result: dict[str, Any], logs: list[str], declared_metrics: set[str]) -> None:
+    score = finite(result.get("score", 0.0))
+    score_unbounded = finite(result.get("score_unbounded", score))
+    error = result.get("error")
+    pass_all = result.get("pass_all")
+    runs_successfully = finite(result.get("runs_successfully", 1.0))
+    correct = bool(result.get("correct", True))
+    passed = error is None and runs_successfully > 0 and correct and (pass_all is not False)
+    metrics = collect_metrics(result, score, score_unbounded, passed, declared_metrics)
+    summary_key = "validation_summary" if mode == "validation" else "official_summary"
+    payload: dict[str, Any] = {
+        "status": "passed" if passed else "failed",
+        "mode": mode,
+        "rank_score": score,
+        "aggregate_metrics": metrics,
+        summary_key: {"score": score, "passed": 1 if passed else 0, "total": 1},
+        "logs": [truncate(item) for item in logs[:4]],
+    }
+    if error is not None:
+        payload["logs"].append(truncate(str(error)))
+    if mode == "validation":
+        payload["public_results"] = [{"case_name": "public-validation", "status": payload["status"], "score": score, "message": truncate(str(error or "ok"), 500)}]
+    output_path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
+
+
+def collect_metrics(result: dict[str, Any], score: float, score_unbounded: float, passed: bool, declared_metrics: set[str]) -> list[dict[str, float | str]]:
+    values: dict[str, float] = {}
+
+    def set_declared(name: str, value: float) -> None:
+        if name in declared_metrics:
+            values[name] = value
+
+    set_declared("score", score)
+    set_declared("score_unbounded", score_unbounded)
+    set_declared("runs_successfully", finite(result.get("runs_successfully", 1.0)))
+    set_declared("correctness", 1.0 if passed else 0.0)
+
+    def add(name: str, value: Any) -> None:
+        if name in {"score", "score_unbounded", "runs_successfully", "correctness"} or name not in declared_metrics:
+            return
+        if isinstance(value, bool):
+            values[name] = 1.0 if value else 0.0
+        elif isinstance(value, (int, float)) and math.isfinite(float(value)):
+            values[name] = float(value)
+    for key, value in result.items():
+        if key in {"metrics", "by_dataset", "stdout", "stderr"}:
+            continue
+        add(key, value)
+    nested = result.get("metrics")
+    if isinstance(nested, dict):
+        for key, value in nested.items():
+            if isinstance(value, dict):
+                continue
+            add(key, value)
+    return [{"metric_name": key, "value": value} for key, value in values.items()]
+
+
+def finite(value: Any) -> float:
+    try:
+        number = float(value)
+    except Exception:
+        return 0.0
+    return number if math.isfinite(number) else 0.0
+
+
+def truncate(value: str, limit: int = MAX_LOG_CHARS) -> str:
+    value = value.replace("\x00", "")
+    if len(value) <= limit:
+        return value
+    return value[:limit] + "... [truncated]"
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/coexecuted-evaluator/setup.py b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/coexecuted-evaluator/setup.py
new file mode 100644
index 00000000..ff34c650
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/coexecuted-evaluator/setup.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+import argparse, json, os, shutil, subprocess
+from pathlib import Path
+ENV_PROJECT_DIR = "evaluator-env"
+PYTHON_INSTALL_DIR = "uv-python"
+PYTHON_REQUEST = "3.12"
+PYPROJECT = '[project]\nname = "vector_add_2_28_frontier_cs_vector_add_2_28"\nversion = "0.1.0"\nrequires-python = ">=3.12,<3.13"\ndependencies = [\n  "torch>=2.11.0,<2.12.0",\n  "triton>=3.5.0,<4",\n  "numpy>=1.26",\n  "tqdm>=4.64",\n]\n\n[tool.uv]\npackage = false\n\n[tool.uv.sources]\ntorch = [\n  { index = "pytorch-cu130", marker = "sys_platform == \'linux\'" },\n]\n\n[[tool.uv.index]]\nname = "pytorch-cu130"\nurl = "https://download.pytorch.org/whl/cu130"\nexplicit = true\n'
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Set up evaluator env")
+    parser.add_argument("--challenge-dir", required=True)
+    parser.add_argument("--setup-dir", required=True)
+    parser.add_argument("--mode", choices=["validation", "official"], required=True)
+    parser.add_argument("--target", required=True)
+    args = parser.parse_args()
+    setup_dir = Path(args.setup_dir)
+    project_dir = setup_dir / ENV_PROJECT_DIR
+    project_dir.mkdir(parents=True, exist_ok=True)
+    (project_dir / "pyproject.toml").write_text(PYPROJECT, encoding="utf-8")
+    env = os.environ.copy()
+    env["UV_CACHE_DIR"] = str(setup_dir / "uv-cache")
+    env["UV_LINK_MODE"] = "copy"
+    env["UV_PROJECT_ENVIRONMENT"] = str(project_dir / ".venv")
+    env["UV_PYTHON_INSTALL_DIR"] = str(setup_dir / PYTHON_INSTALL_DIR)
+    subprocess.run(["uv", "python", "install", PYTHON_REQUEST], check=True, env=env, timeout=180)
+    managed = find_managed_python(env)
+    subprocess.run(["uv", "sync", "--project", str(project_dir), "--python", str(managed), "--no-dev", "--no-install-project"], check=True, env=env, timeout=1200)
+    (project_dir / "agentics-env.json").write_text(json.dumps({"mode": args.mode, "target": args.target}, indent=2), encoding="utf-8")
+    shutil.rmtree(setup_dir / "uv-cache", ignore_errors=True)
+    return 0
+
+def find_managed_python(env: dict[str, str]) -> Path:
+    result = subprocess.run(["uv", "python", "find", PYTHON_REQUEST, "--managed-python", "--resolve-links"], check=True, capture_output=True, text=True, env=env, timeout=60)
+    path = Path(result.stdout.strip())
+    if not path.is_file():
+        raise RuntimeError(f"managed Python not found at {path}")
+    return path
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/README.md b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/README.md
new file mode 100644
index 00000000..516fd868
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/README.md
@@ -0,0 +1,3 @@
+# Public Validation
+
+Tiny deterministic validation config for `research/problems/vector_addition/2_28`.
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/config.json b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/config.json
new file mode 100644
index 00000000..6b0ed234
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/config.json
@@ -0,0 +1,11 @@
+{
+  "runner": "frontier_python_evaluate",
+  "submission_spec_path": "public/submission_spec.json",
+  "benchmark_override": "vector_sizes",
+  "sizes": [
+    1024
+  ],
+  "num_samples": 1,
+  "gpu_warmups": 1,
+  "inner_warmups": 1
+}
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/submission_spec.json b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/submission_spec.json
new file mode 100644
index 00000000..40b994da
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/public/submission_spec.json
@@ -0,0 +1,15 @@
+{
+  "problem_name": "vector_addition",
+  "description": "Triton kernel optimization problem for high-performance vector addition",
+  "requirements": {
+    "cuda_backend": true,
+    "gpu_required": true,
+    "triton_version": ">=2.1.0",
+    "torch_version": ">=2.0.0"
+  },
+  "evaluation": {
+    "timeout_seconds": 300,
+    "memory_limit_mb": 8192,
+    "gpu_memory_limit_mb": 4096
+  }
+}
\ No newline at end of file
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/pyproject.toml b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/pyproject.toml
new file mode 100644
index 00000000..db74b8e4
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "vector-addition"
+version = "0.1.0"
+description = "Vector addition problem resources"
+requires-python = ">=3.8"
+dependencies = []
+# Docker image already has torch, triton, numpy, tqdm
+
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/submission_spec.json b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/submission_spec.json
new file mode 100644
index 00000000..40b994da
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/submission_spec.json
@@ -0,0 +1,15 @@
+{
+  "problem_name": "vector_addition",
+  "description": "Triton kernel optimization problem for high-performance vector addition",
+  "requirements": {
+    "cuda_backend": true,
+    "gpu_required": true,
+    "triton_version": ">=2.1.0",
+    "torch_version": ">=2.0.0"
+  },
+  "evaluation": {
+    "timeout_seconds": 300,
+    "memory_limit_mb": 8192,
+    "gpu_memory_limit_mb": 4096
+  }
+}
\ No newline at end of file
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/vector-add.py b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/vector-add.py
new file mode 100644
index 00000000..90552a4c
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/resources/vector-add.py
@@ -0,0 +1,139 @@
+"""
+Vector Addition
+===============
+
+In this tutorial, you will write a simple vector addition using Triton.
+
+In doing so, you will learn about:
+
+* The basic programming model of Triton.
+
+* The `triton.jit` decorator, which is used to define Triton kernels.
+
+* The best practices for validating and benchmarking your custom ops against native reference implementations.
+
+"""
+
+# %%
+# Compute Kernel
+# --------------
+
+import torch
+
+import triton
+import triton.language as tl
+
+# Ensure CUDA is available and properly initialize device
+if not torch.cuda.is_available():
+    raise RuntimeError("CUDA is not available. This benchmark requires a CUDA-enabled GPU.")
+DEVICE = torch.device("cuda:0")
+torch.cuda.set_device(DEVICE)
+
+
+@triton.jit
+def add_kernel(x_ptr,  # *Pointer* to first input vector.
+               y_ptr,  # *Pointer* to second input vector.
+               output_ptr,  # *Pointer* to output vector.
+               n_elements,  # Size of the vector.
+               BLOCK_SIZE: tl.constexpr,  # Number of elements each program should process.
+               # NOTE: `constexpr` so it can be used as a shape value.
+               ):
+    # There are multiple 'programs' processing different data. We identify which program
+    # we are here:
+    pid = tl.program_id(axis=0)  # We use a 1D launch grid so axis is 0.
+    # This program will process inputs that are offset from the initial data.
+    # For instance, if you had a vector of length 256 and block_size of 64, the programs
+    # would each access the elements [0:64, 64:128, 128:192, 192:256].
+    # Note that offsets is a list of pointers:
+    block_start = pid * BLOCK_SIZE
+    offsets = block_start + tl.arange(0, BLOCK_SIZE)
+    # Create a mask to guard memory operations against out-of-bounds accesses.
+    mask = offsets < n_elements
+    # Load x and y from DRAM, masking out any extra elements in case the input is not a
+    # multiple of the block size.
+    x = tl.load(x_ptr + offsets, mask=mask)
+    y = tl.load(y_ptr + offsets, mask=mask)
+    output = x + y
+    # Write x + y back to DRAM.
+    tl.store(output_ptr + offsets, output, mask=mask)
+
+
+# %%
+# Let's also declare a helper function to (1) allocate the `z` tensor
+# and (2) enqueue the above kernel with appropriate grid/block sizes:
+
+
+def add(x: torch.Tensor, y: torch.Tensor):
+    # We need to preallocate the output.
+    output = torch.empty_like(x)
+    assert x.device == DEVICE and y.device == DEVICE and output.device == DEVICE
+    n_elements = output.numel()
+    # The SPMD launch grid denotes the number of kernel instances that run in parallel.
+    # It is analogous to CUDA launch grids. It can be either Tuple[int], or Callable(metaparameters) -> Tuple[int].
+    # In this case, we use a 1D grid where the size is the number of blocks:
+    grid = lambda meta: (triton.cdiv(n_elements, meta['BLOCK_SIZE']), )
+    # NOTE:
+    #  - Each torch.tensor object is implicitly converted into a pointer to its first element.
+    #  - `triton.jit`'ed functions can be indexed with a launch grid to obtain a callable GPU kernel.
+    #  - Don't forget to pass meta-parameters as keywords arguments.
+    add_kernel[grid](x, y, output, n_elements, BLOCK_SIZE=1024)
+    # We return a handle to z but, since `torch.cuda.synchronize()` hasn't been called, the kernel is still
+    # running asynchronously at this point.
+    return output
+
+
+# %%
+# We can now use the above function to compute the element-wise sum of two `torch.tensor` objects and test its correctness:
+
+torch.manual_seed(0)
+size = 98432
+x = torch.rand(size, device=DEVICE)
+y = torch.rand(size, device=DEVICE)
+output_torch = x + y
+output_triton = add(x, y)
+print(output_torch)
+print(output_triton)
+print(f'The maximum difference between torch and triton is '
+      f'{torch.max(torch.abs(output_torch - output_triton))}')
+
+# %%
+# Seems like we're good to go!
+
+# %%
+# Benchmark
+# ---------
+#
+# We can now benchmark our custom op on vectors of increasing sizes to get a sense of how it does relative to PyTorch.
+# To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of our custom ops.
+# for different problem sizes.
+
+
+@triton.testing.perf_report(
+    triton.testing.Benchmark(
+        x_names=['size'],  # Argument names to use as an x-axis for the plot.
+        x_vals=[2**i for i in range(12, 28, 1)],  # Different possible values for `x_name`.
+        x_log=True,  # x axis is logarithmic.
+        line_arg='provider',  # Argument name whose value corresponds to a different line in the plot.
+        line_vals=['triton', 'torch'],  # Possible values for `line_arg`.
+        line_names=['Triton', 'Torch'],  # Label name for the lines.
+        styles=[('blue', '-'), ('green', '-')],  # Line styles.
+        ylabel='GB/s',  # Label name for the y-axis.
+        plot_name='vector-add-performance',  # Name for the plot. Used also as a file name for saving the plot.
+        args={},  # Values for function arguments not in `x_names` and `y_name`.
+    ))
+def benchmark(size, provider):
+    x = torch.rand(size, device=DEVICE, dtype=torch.float32)
+    y = torch.rand(size, device=DEVICE, dtype=torch.float32)
+    quantiles = [0.5, 0.2, 0.8]
+    if provider == 'torch':
+        ms, min_ms, max_ms = triton.testing.do_bench(lambda: x + y, quantiles=quantiles)
+    if provider == 'triton':
+        ms, min_ms, max_ms = triton.testing.do_bench(lambda: add(x, y), quantiles=quantiles)
+    gbps = lambda ms: 3 * x.numel() * x.element_size() * 1e-9 / (ms * 1e-3)
+    return gbps(ms), gbps(max_ms), gbps(min_ms)
+
+
+# %%
+# We can now run the decorated function above. Pass `print_data=True` to see the performance number, `show_plots=True` to plot them, and/or
+# `save_path='/path/to/results/' to save them to disk along with raw CSV data:
+benchmark.run(print_data=True, show_plots=False)
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/source-evaluator.py b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/source-evaluator.py
new file mode 100644
index 00000000..ce9b10b0
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/source-evaluator.py
@@ -0,0 +1,415 @@
+#!/usr/bin/env python3
+import argparse
+import importlib.util
+import json
+import math
+import os
+import sys
+from pathlib import Path
+from types import ModuleType
+from typing import Any, Dict, List, Tuple
+
+# Add resources to path for imports
+HERE = Path(__file__).resolve().parent
+RESOURCES_DIR = HERE / "resources"
+sys.path.insert(0, str(RESOURCES_DIR))
+
+import torch
+import triton
+import numpy as np
+
+DEFAULT_SPEC = HERE / "resources" / "submission_spec.json"
+ARTIFACT_PATH = Path("./output_ans").resolve()
+
+DEVICE = triton.runtime.driver.active.get_active_torch_device()
+
+
+def _determine_large_test_sizes() -> List[int]:
+    """Return test size: 2^28 (268,435,456 elements)."""
+    return [2**28]
+
+
+DEFAULT_SEED = 1337
+NUM_VECTOR_SAMPLES = 5
+GPU_WARMUP_ITERS = 10
+INNER_ADD_WARMUP_ITERS = 5
+
+
+def warmup_gpu(iters: int = GPU_WARMUP_ITERS) -> None:
+    """Run a few trivial GPU ops to warm up kernels and clocks."""
+    if not torch.cuda.is_available():
+        return
+    torch.cuda.synchronize()
+    n = 1 << 20
+    a = torch.rand(n, device=DEVICE, dtype=torch.float32)
+    b = torch.rand(n, device=DEVICE, dtype=torch.float32)
+    for _ in range(max(1, int(iters))):
+        c = a + b
+    torch.cuda.synchronize()
+
+
+def load_solution_module(solution_path: Path) -> ModuleType:
+    """Load the solution module from the given path."""
+    if not solution_path.exists():
+        raise FileNotFoundError(f"solution.py not found at {solution_path}")
+    spec = importlib.util.spec_from_file_location("submitted_solution", solution_path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Failed to load spec for {solution_path}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module  # Register before exec for self-referential imports
+    spec.loader.exec_module(module)
+    return module
+
+
+def materialize_artifact(result: Any, solution_path: Path) -> Path:
+    """Materialize the solution result into an artifact file."""
+    ARTIFACT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    if isinstance(result, dict):
+        with ARTIFACT_PATH.open("w", encoding="utf-8") as fout:
+            json.dump(result, fout)
+        return ARTIFACT_PATH
+    if isinstance(result, str):
+        # Check if the string could be a file path (reasonable length and no newlines)
+        # before calling is_file() to avoid "File name too long" errors
+        is_possible_path = len(result) < 4096 and '\n' not in result
+        if is_possible_path:
+            candidate = Path(result)
+            try:
+                if candidate.is_file():
+                    with ARTIFACT_PATH.open("w", encoding="utf-8") as fout:
+                        json.dump({"program_path": str(candidate.resolve())}, fout)
+                    return ARTIFACT_PATH
+            except OSError:
+                # Path too long or other OS error - treat as code string
+                pass
+        # Treat as code string
+        with ARTIFACT_PATH.open("w", encoding="utf-8") as fout:
+            fout.write(result)
+        return ARTIFACT_PATH
+    raise TypeError(
+        "Solution.solve() must return a dict/path-string/code-string; got "
+        f"{type(result)!r}."
+    )
+
+
+def load_add_from_artifact(artifact_path: Path) -> Any:
+    """Load the add function from the artifact."""
+    with artifact_path.open("r", encoding="utf-8") as fin:
+        artifact = json.load(fin)
+    
+    if "code" in artifact:
+        # Write code to temporary file and import as module to avoid Triton source inspection issues
+        import tempfile
+        import os
+        
+        try:
+            # Create temporary file
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
+                f.write(artifact["code"])
+                temp_file = f.name
+            
+            # Import the module
+            spec = importlib.util.spec_from_file_location("temp_add_module", temp_file)
+            module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(module)
+            
+            if not hasattr(module, "add"):
+                raise ValueError("Code must define an 'add' function")
+            
+            # Don't delete temp file - Triton JIT needs source file at compile time
+            return module.add
+        except Exception as e:
+            raise
+    
+    elif "program_path" in artifact:
+        # Load from external file
+        program_path = Path(artifact["program_path"])
+        if not program_path.exists():
+            raise FileNotFoundError(f"Program file not found: {program_path}")
+        
+        spec = importlib.util.spec_from_file_location("submitted_program", program_path)
+        if spec is None or spec.loader is None:
+            raise ImportError(f"Failed to load spec for {program_path}")
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        
+        if not hasattr(module, "add"):
+            raise ValueError("Program must define an 'add' function")
+        return module.add
+    
+    else:
+        raise ValueError("Artifact must contain either 'code' or 'program_path'")
+
+
+def benchmark_add(add_func: Any, sizes: List[int], seed: int = DEFAULT_SEED, num_samples: int = NUM_VECTOR_SAMPLES) -> Dict[str, Any]:
+    """Benchmark the add function against PyTorch baseline with seeding and averaging."""
+    results = []
+    
+    # Warm up the GPU for more stable timings
+    warmup_gpu(GPU_WARMUP_ITERS)
+    
+    for size in sizes:
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(seed)
+        np.random.seed(seed)
+        
+        pytorch_ms_list = []
+        cpu_ms_list = []
+        custom_ms_list = []
+        correctness_list = []
+        
+        for sample_idx in range(max(1, int(num_samples))):
+            # Create test vectors deterministically
+            x = torch.rand(size, device=DEVICE, dtype=torch.float32)
+            y = torch.rand(size, device=DEVICE, dtype=torch.float32)
+            # CPU baseline vectors
+            x_cpu = x.detach().cpu()
+            y_cpu = y.detach().cpu()
+            
+            # PyTorch baseline (GPU)
+            def pytorch_add():
+                return x + y
+            # Inner warmup additions before timing
+            if torch.cuda.is_available():
+                for _ in range(INNER_ADD_WARMUP_ITERS):
+                    _ = pytorch_add()
+                torch.cuda.synchronize()
+            pytorch_ms = triton.testing.do_bench(pytorch_add, quantiles=[0.5])
+            if isinstance(pytorch_ms, (tuple, list)):
+                pytorch_ms = pytorch_ms[0]
+            pytorch_ms_list.append(float(pytorch_ms))
+            
+            # Naive CPU baseline
+            def cpu_add():
+                return x_cpu + y_cpu
+            cpu_ms = triton.testing.do_bench(cpu_add, quantiles=[0.5])
+            if isinstance(cpu_ms, (tuple, list)):
+                cpu_ms = cpu_ms[0]
+            cpu_ms_list.append(float(cpu_ms))
+            
+            # Custom implementation (GPU)
+            def custom_add():
+                return add_func(x, y)
+            # Inner warmup additions before timing
+            if torch.cuda.is_available():
+                for _ in range(INNER_ADD_WARMUP_ITERS):
+                    _ = custom_add()
+                torch.cuda.synchronize()
+            custom_ms = triton.testing.do_bench(custom_add, quantiles=[0.5])
+            if isinstance(custom_ms, (tuple, list)):
+                custom_ms = custom_ms[0]
+            custom_ms_list.append(float(custom_ms))
+            
+            # Correctness test on this sample
+            pytorch_result = pytorch_add()
+            custom_result = custom_add()
+            is_correct = torch.allclose(pytorch_result, custom_result, rtol=1e-5, atol=1e-8)
+            correctness_list.append(bool(is_correct))
+        
+        # Aggregate timings as medians for stability
+        def median(lst):
+            s = sorted(lst)
+            mid = len(s) // 2
+            if len(s) % 2 == 1:
+                return s[mid]
+            return 0.5 * (s[mid - 1] + s[mid])
+        
+        pytorch_ms = median(pytorch_ms_list)
+        cpu_ms = median(cpu_ms_list)
+        custom_ms = median(custom_ms_list)
+        
+        # Bandwidths (GB/s)
+        pytorch_bandwidth = 3 * size * 4 * 1e-9 / (pytorch_ms * 1e-3)
+        cpu_bandwidth = 3 * size * 4 * 1e-9 / (cpu_ms * 1e-3)
+        custom_bandwidth = 3 * size * 4 * 1e-9 / (custom_ms * 1e-3)
+        
+        is_correct = all(correctness_list)
+        
+        results.append({
+            "size": size,
+            "pytorch_ms": pytorch_ms,
+            "cpu_ms": cpu_ms,
+            "custom_ms": custom_ms,
+            "pytorch_bandwidth": pytorch_bandwidth,
+            "cpu_bandwidth": cpu_bandwidth,
+            "custom_bandwidth": custom_bandwidth,
+            "speedup": pytorch_ms / custom_ms if custom_ms > 0 else 0.0,
+            "bandwidth_ratio": custom_bandwidth / cpu_bandwidth if cpu_bandwidth > 0 else 0.0,
+            "is_correct": is_correct,
+        })
+    
+    return results
+
+
+def evaluate_vector_addition(add_func: Any) -> Dict[str, Any]:
+    """Evaluate the performance of a vector addition implementation."""
+    try:
+        # Use large sizes based on GPU memory so GPU >> CPU
+        sizes = _determine_large_test_sizes()
+        
+        # Run benchmark
+        results = benchmark_add(add_func, sizes)
+        
+        # Enforce strict correctness: if any test fails, score 0
+        if not results or not all(r["is_correct"] for r in results):
+            return {
+                "error": "Correctness not 100% across all samples/sizes",
+                "score": 0,
+                "pass_all": False,
+                "total_tests": len(results),
+                "passed_tests": sum(1 for r in results if r.get("is_correct")),
+                "results": results,
+            }
+        
+        # Calculate metrics
+        bandwidth_ratios = [r["bandwidth_ratio"] for r in results if r["is_correct"]]
+        speedups = [r["speedup"] for r in results if r["is_correct"]]
+        pytorch_vs_cpu = [
+            max(r["pytorch_bandwidth"] / max(r["cpu_bandwidth"], 1e-12), 1e-12)
+            for r in results if r["is_correct"]
+        ]
+        custom_vs_cpu = [
+            max(r["custom_bandwidth"] / max(r["cpu_bandwidth"], 1e-12), 1e-12)
+            for r in results if r["is_correct"]
+        ]
+        
+        if not bandwidth_ratios:
+            return {
+                "error": "All correctness tests failed",
+                "score": 0,
+                "pass_all": False,
+            }
+        
+        geometric_mean_bandwidth_ratio = math.exp(sum(math.log(r) for r in bandwidth_ratios) / len(bandwidth_ratios))
+        arithmetic_mean_bandwidth_ratio = sum(bandwidth_ratios) / len(bandwidth_ratios)
+        gm_pytorch_vs_cpu = math.exp(sum(math.log(r) for r in pytorch_vs_cpu) / len(pytorch_vs_cpu))
+        gm_custom_vs_cpu = math.exp(sum(math.log(r) for r in custom_vs_cpu) / len(custom_vs_cpu))
+        
+        # Calculate score (0-100 scale)
+        # Anchor 0 at CPU naive baseline (custom/cpu = 1x)
+        # Anchor 100 at 2x PyTorch GPU baseline (custom/cpu = 2 * pytorch/cpu)
+        target = max(2.0 * gm_pytorch_vs_cpu, 1.0 + 1e-12)
+        numerator = max(0.0, gm_custom_vs_cpu - 1.0)
+        denominator = max(target - 1.0, 1e-12)
+        normalized_unbounded = numerator / denominator
+        normalized = max(0.0, min(1.0, normalized_unbounded))
+        score_unbounded = normalized_unbounded * 100.0
+        score = max(0.0, min(100.0, score_unbounded))
+
+        return {
+            "geometric_mean_bandwidth_ratio": geometric_mean_bandwidth_ratio,
+            "arithmetic_mean_bandwidth_ratio": arithmetic_mean_bandwidth_ratio,
+            "geometric_mean_custom_vs_cpu": gm_custom_vs_cpu,
+            "geometric_mean_pytorch_vs_cpu": gm_pytorch_vs_cpu,
+            "score": score,
+            "score_unbounded": score_unbounded,
+            "pass_all": True,
+            "total_tests": len(results),
+            "passed_tests": sum(1 for r in results if r["is_correct"]),
+            "results": results,
+        }
+        
+    except Exception as e:
+        return {
+            "error": str(e),
+            "score": 0,
+            "pass_all": False,
+        }
+
+
+def evaluate(solution_path: Path, spec_path: Path) -> dict:
+    """Main evaluation function."""
+    try:
+        # Load solution module
+        module = load_solution_module(solution_path)
+        
+        if not hasattr(module, "Solution"):
+            raise ValueError("Solution module must define a 'Solution' class")
+        
+        solution_class = module.Solution
+        solution_instance = solution_class()
+        
+        if not hasattr(solution_instance, "solve"):
+            raise ValueError("Solution class must have a 'solve' method")
+        
+        # Get solution result
+        result = solution_instance.solve(spec_path)
+        
+        # Materialize artifact
+        artifact_path = materialize_artifact(result, solution_path)
+        
+        # Load add function from artifact
+        add_func = load_add_from_artifact(artifact_path)
+        
+        # Evaluate performance
+        evaluation_result = evaluate_vector_addition(add_func)
+        
+        return {
+            "status": "success",
+            "artifact_path": str(artifact_path),
+            **evaluation_result,
+        }
+        
+    except Exception as e:
+        return {
+            "status": "error",
+            "error": str(e),
+            "score": 0,
+        }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Evaluate vector addition solutions")
+    parser.add_argument(
+        "--solution-path",
+        type=Path,
+        default=Path("./solution.py"),
+        help="Path to solution.py file",
+    )
+    parser.add_argument(
+        "--spec-path",
+        type=Path,
+        default=DEFAULT_SPEC,
+        help="Path to specification file",
+    )
+    parser.add_argument(
+        "--output-path",
+        type=Path,
+        default=Path("./result.json"),
+        help="Path to output result file",
+    )
+    
+    args = parser.parse_args()
+    
+    # Run evaluation
+    result = evaluate(args.solution_path, args.spec_path)
+    
+    # Write result
+    with args.output_path.open("w", encoding="utf-8") as fout:
+        json.dump(result, fout, indent=2)
+    
+    # Print summary
+    if result["status"] == "success":
+        print(f"Evaluation completed successfully!")
+        print(f"Score: {result.get('score', 0):.2f}/100")
+        if 'geometric_mean_bandwidth_ratio' in result:
+            print(f"Geometric mean bandwidth ratio: {result['geometric_mean_bandwidth_ratio']:.3f}x")
+        if 'passed_tests' in result and 'total_tests' in result:
+            print(f"Tests passed: {result['passed_tests']}/{result['total_tests']}")
+        if 'error' in result:
+            print(f"Error: {result['error']}")
+        # Print score as last line for main_loop.sh to extract
+        # Format: "score score_unbounded" (space-separated)
+        score = result.get('score', 0)
+        score_unbounded = result.get('score_unbounded', score)
+        print(f"{score} {score_unbounded}")
+    else:
+        print(f"Evaluation failed: {result.get('error', 'Unknown error')}")
+        # Print error score as last line
+        print("0")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/spec.json b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/spec.json
new file mode 100644
index 00000000..ea0919b2
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/spec.json
@@ -0,0 +1,166 @@
+{
+  "schema_version": 1,
+  "challenge_name": "vector-add-2-28-frontier-cs-vector-add-2-28",
+  "challenge_title": "Vector Addition 2^28 Throughput",
+  "summary": {
+    "en": "Optimize a Triton vector-addition kernel for 2^28 CUDA elements.",
+    "zh": "Optimize a Triton vector-addition kernel for 2^28 CUDA elements."
+  },
+  "keywords": [
+    "cuda",
+    "vector",
+    "triton"
+  ],
+  "solution": {
+    "protocol": "zip_project",
+    "manifest_file": "agentics.solution.json"
+  },
+  "targets": [
+    {
+      "name": "linux-arm64-cuda",
+      "docker_platform": "linux/arm64",
+      "accelerator": "gpu",
+      "validation_enabled": true,
+      "resource_profile": {
+        "name": "agentics-cuda-cu130-gb10",
+        "solution_image": {
+          "source": "registry",
+          "reference": "ghcr.io/agentic-science/agentics-linux-arm64-cuda:cu130-ubuntu24.04-v0.2.5@sha256:8e3da4a65e297e3b1e9800da001fa2bbac9ed48453a6972117a0c3ad1d1eef13"
+        },
+        "evaluator_image": {
+          "source": "registry",
+          "reference": "ghcr.io/agentic-science/agentics-linux-arm64-cuda:cu130-ubuntu24.04-v0.2.5@sha256:8e3da4a65e297e3b1e9800da001fa2bbac9ed48453a6972117a0c3ad1d1eef13"
+        },
+        "solution": {
+          "setup": {
+            "timeout_sec": 120,
+            "memory_limit_mb": 2048,
+            "cpu_limit_millis": 2000,
+            "disk_limit_mb": 2048,
+            "network_access": "disabled"
+          },
+          "build": {
+            "timeout_sec": 120,
+            "memory_limit_mb": 2048,
+            "cpu_limit_millis": 2000,
+            "disk_limit_mb": 2048,
+            "network_access": "disabled"
+          }
+        },
+        "evaluator": {
+          "setup": {
+            "timeout_sec": 1200,
+            "memory_limit_mb": 6144,
+            "cpu_limit_millis": 4000,
+            "disk_limit_mb": 16384,
+            "network_access": "enabled"
+          },
+          "run": {
+            "timeout_sec": 3600,
+            "memory_limit_mb": 8192,
+            "cpu_limit_millis": 8000,
+            "disk_limit_mb": 8192,
+            "network_access": "disabled"
+          }
+        },
+        "resource_description": "ARM64 CUDA 13.0 profile for Frontier-CS coexecuted GPU benchmarks.",
+        "hardware_metadata": {
+          "kind": "cuda",
+          "gpu_model": "NVIDIA GB10",
+          "gpu_count": 1,
+          "cuda_variant": "cu130",
+          "cuda_version": "13.0",
+          "driver_minimum": "580.142"
+        }
+      }
+    }
+  ],
+  "starts_at": "2026-01-01T00:00:00Z",
+  "eligibility": {
+    "type": "open"
+  },
+  "visibility": {
+    "leaderboard": "public_live",
+    "score_distribution": "public_live",
+    "result_detail": "submitter_live_public_live"
+  },
+  "solution_publication": "public",
+  "execution": {
+    "mode": "coexecuted_benchmark",
+    "coexecuted_evaluator": {
+      "command": [
+        "python",
+        "coexecuted-evaluator/run.py"
+      ],
+      "result_file": "result.json"
+    },
+    "acknowledge_danger": true,
+    "validation_setup": {
+      "command": [
+        "python",
+        "coexecuted-evaluator/setup.py"
+      ],
+      "reproducibility_notes": "Creates a uv-managed project environment under /setup using uv sync; no uv pip interface is used."
+    },
+    "official_evaluation_setup": {
+      "command": [
+        "python",
+        "coexecuted-evaluator/setup.py"
+      ],
+      "reproducibility_notes": "Creates the same uv-managed project environment for official evaluation; private benchmark data is not used for dependency resolution."
+    }
+  },
+  "datasets": {
+    "public_dir": "public",
+    "private_benchmark_dir": "private-benchmark",
+    "public_policy": "full",
+    "private_benchmark_policy": "score_only",
+    "private_benchmark_enabled": true
+  },
+  "metric_schema": {
+    "metrics": [
+      {
+        "name": "score",
+        "label": "Score",
+        "direction": "maximize",
+        "visibility": "public",
+        "metric_description": "Score"
+      },
+      {
+        "name": "score_unbounded",
+        "label": "Unbounded Score",
+        "direction": "maximize",
+        "visibility": "public",
+        "metric_description": "Unbounded Score"
+      },
+      {
+        "name": "correctness",
+        "label": "Correctness",
+        "direction": "maximize",
+        "visibility": "public",
+        "metric_description": "Correctness"
+      },
+      {
+        "name": "geometric_mean_speedup",
+        "label": "Geometric Mean Speedup",
+        "direction": "maximize",
+        "visibility": "public",
+        "metric_description": "Geometric Mean Speedup"
+      },
+      {
+        "name": "passed_tests",
+        "label": "Passed Tests",
+        "direction": "maximize",
+        "visibility": "public",
+        "metric_description": "Passed Tests"
+      }
+    ],
+    "ranking": {
+      "primary_metric_name": "score",
+      "tie_breaker_metric_names": [
+        "score_unbounded",
+        "correctness"
+      ]
+    }
+  }
+}
diff --git a/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/statement.md b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/statement.md
new file mode 100644
index 00000000..d2658e98
--- /dev/null
+++ b/challenges/vector-add-2-28-frontier-cs-vector-add-2-28/v1/statement.md
@@ -0,0 +1,111 @@
+# Vector Addition 2^28 Throughput
+
+Ported from Frontier-CS `research/problems/vector_addition/2_28`.
+
+## Agentics Interface
+
+Submit a ZIP project containing the source interface described below. The trusted evaluator imports or compiles participant code from `/workspace`, so this challenge uses `coexecuted_benchmark` with `acknowledge_danger: true`.
+
+## Public And Official Data
+
+Public validation uses a small deterministic configuration committed under `v1/public`. Official scoring uses the private `official-runs` overlay under `private-benchmark/`.
+
+## Original Statement
+
+Vector Addition Problem - Very Large Vectors (2^28)
+==============================================
+
+Problem Setting
+---------------
+Design and optimize high-performance Triton kernels for vector addition on GPU with very large vectors (268,435,456 elements). This problem focuses on implementing efficient element-wise addition for maximum throughput scenarios.
+
+The challenge involves optimizing:
+- **Memory access patterns**: Efficient loading and storing of large vector data
+- **Block sizing**: Optimal block sizes for large GPU workloads
+- **Memory bandwidth**: Maximizing throughput at scale
+- **Performance benchmarking**: Achieving speedup over PyTorch baseline
+
+This variant tests performance on very large vectors (2^28 = 268,435,456 elements = 1 GB per vector). Requires ~3 GB GPU memory total.
+
+Target
+------
+- **Primary**: Maximize bandwidth (GB/s) over PyTorch baseline (higher is better)
+- **Secondary**: Ensure correctness on large vectors
+- **Tertiary**: Minimize memory overhead
+
+API Specification
+-----------------
+Implement a `Solution` class that returns a Triton kernel implementation:
+
+```python
+class Solution:
+    def solve(self, spec_path: str = None) -> dict:
+        """
+        Returns a dict with either:
+        - {"code": "python_code_string"}
+        - {"program_path": "path/to/kernel.py"}
+        """
+        # Your implementation
+        pass
+```
+
+Your kernel implementation must provide:
+
+```python
+import torch
+import triton
+import triton.language as tl
+
+def add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    """
+    Element-wise addition of two vectors.
+    
+    Args:
+        x: Input tensor of shape (268435456,)
+        y: Input tensor of shape (268435456,)
+    
+    Returns:
+        Output tensor of shape (268435456,) with x + y
+    """
+    pass
+```
+
+API Usage Notes
+---------------
+- The evaluator looks for an `add` function in the module namespace
+- Function must handle vector size of exactly 268,435,456 elements
+- Must use Triton JIT compilation for kernel definition
+- Should optimize for maximum memory bandwidth at scale
+- Input tensors are guaranteed to be contiguous and same size
+- May cause OOM on GPUs with less than 3GB memory
+
+Scoring (0-100)
+---------------
+Performance is measured against CPU baseline and PyTorch GPU baseline:
+
+```
+target = max(2.0 * (pytorch_bandwidth / cpu_bandwidth), 1.0)
+score = ((custom_bandwidth / cpu_bandwidth - 1.0) / (target - 1.0)) * 100
+
+Where:
+- custom_bandwidth = your solution's bandwidth
+- cpu_bandwidth = naive CPU baseline bandwidth
+- pytorch_bandwidth = PyTorch GPU baseline bandwidth
+- target = 2x PyTorch performance vs CPU (normalized to custom vs CPU)
+
+Score is clamped to [0, 100] range
+```
+
+- 0 points = CPU baseline performance (custom/cpu = 1x)
+- 50 points = Halfway between CPU baseline and 2x PyTorch performance
+- 100 points = 2x PyTorch GPU performance vs CPU (custom/cpu = 2 * pytorch/cpu)
+
+Evaluation Details
+------------------
+- Tested on vector size: 2^28 = 268,435,456 elements
+- Performance measured in GB/s (bandwidth)
+- Correctness verified with tolerance: rtol=1e-5, atol=1e-8
+- Performance measured using median execution time across 5 samples
+- Requires CUDA backend and GPU support
+- Requires sufficient GPU memory (may OOM on smaller GPUs)
+
diff --git a/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/README.md b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/README.md
new file mode 100644
index 00000000..d8e3b20a
--- /dev/null
+++ b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/README.md
@@ -0,0 +1,3 @@
+# vector-add-2-28-frontier-cs-vector-add-2-28 Smoke Solution
+
+Cheap public-validation smoke solution.
diff --git a/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/agentics.solution.json b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/agentics.solution.json
new file mode 100644
index 00000000..034defa2
--- /dev/null
+++ b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/agentics.solution.json
@@ -0,0 +1,10 @@
+{
+  "protocol": "zip_project",
+  "protocol_version": 1,
+  "note": "Cheap smoke solution for vector-add-2-28-frontier-cs-vector-add-2-28.",
+  "commands": {
+    "setup": "scripts/setup.sh",
+    "build": "scripts/build.sh",
+    "run": "run.sh"
+  }
+}
diff --git a/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/run.sh b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/run.sh
new file mode 100755
index 00000000..94b6de34
--- /dev/null
+++ b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/run.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env sh
+set -eu
+exit 0
diff --git a/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/scripts/build.sh b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/scripts/build.sh
new file mode 100755
index 00000000..c72e926c
--- /dev/null
+++ b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/scripts/build.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env sh
+set -eu
diff --git a/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/scripts/setup.sh b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/scripts/setup.sh
new file mode 100755
index 00000000..c72e926c
--- /dev/null
+++ b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/scripts/setup.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env sh
+set -eu
diff --git a/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/solution.py b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/solution.py
new file mode 100644
index 00000000..58f9af64
--- /dev/null
+++ b/test-solutions/vector-add-2-28-frontier-cs-vector-add-2-28/solution.py
@@ -0,0 +1,6 @@
+from __future__ import annotations
+class Solution:
+    def solve(self, spec_path=None):
+        return {"code":"""def add(x,y):
+    return x + y
+"""}