diff --git a/evals/openai-agent/eval.yaml b/evals/openai-agent/eval.yaml index 33492dca7..5b46e20b3 100644 --- a/evals/openai-agent/eval.yaml +++ b/evals/openai-agent/eval.yaml @@ -92,3 +92,13 @@ config: toolPattern: ".*" minToolCalls: 1 maxToolCalls: 15 + # Must-gather archive analysis tasks (offline, no live cluster needed) + - glob: ../tasks/mustgather/*/*.yaml + labelSelector: + suite: mustgather + assertions: + toolsUsed: + - server: kubernetes + toolPattern: "mustgather_.*" + minToolCalls: 2 + maxToolCalls: 10 diff --git a/evals/tasks/mustgather/resources-list-nodes/task.yaml b/evals/tasks/mustgather/resources-list-nodes/task.yaml new file mode 100644 index 000000000..b8ccdf97b --- /dev/null +++ b/evals/tasks/mustgather/resources-list-nodes/task.yaml @@ -0,0 +1,46 @@ +kind: Task +metadata: + labels: + suite: mustgather + name: resources-list-nodes + difficulty: easy +steps: + setup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + MG_DIR="/tmp/mustgather-eval" + rm -rf "${MG_DIR}" + mkdir -p "${MG_DIR}" + echo "Downloading must-gather archive..." + curl -sSL -o "${MG_DIR}/must-gather.tar" \ + "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs/periodic-ci-openshift-release-main-nightly-4.22-e2e-metal-ipi-ovn-ipv4/2042251217534455808/artifacts/e2e-metal-ipi-ovn-ipv4/gather-must-gather/artifacts/must-gather.tar" + echo "Extracting must-gather archive..." + tar xf "${MG_DIR}/must-gather.tar" -C "${MG_DIR}" + rm -f "${MG_DIR}/must-gather.tar" + echo "Must-gather archive ready at ${MG_DIR}" + verify: + contains: "master-0" + cleanup: + inline: |- + #!/usr/bin/env bash + set -euo pipefail + rm -rf /tmp/mustgather-eval + prompt: + inline: |- + I have a must-gather archive extracted at /tmp/mustgather-eval. Please: + 1. Load the must-gather archive from /tmp/mustgather-eval using the mustgather_use tool + 2. List all Node resources from the archive using the mustgather_resources_list tool with kind "Node" + Report the node names and count. + assertions: + toolsUsed: + - server: kubernetes + toolPattern: "mustgather_use" + args: + path: "/tmp/mustgather-eval" + - server: kubernetes + toolPattern: "mustgather_resources_list" + args: + kind: "Node" + minToolCalls: 2 + maxToolCalls: 5