agentic-science · ifsheldon · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/...nges/adaptive-impostor-search-frontier-cs-algorithmic-245/v1/interactive-evaluator/run.py b/...nges/adaptive-impostor-search-frontier-cs-algorithmic-245/v1/interactive-evaluator/run.py
@@ -190,7 +190,7 @@ def main() -> int:
                 return 0
 
             report_path = tmp_dir / f"report-{index}.txt"
-            cmd = [str(binary), str(input_path), "/dev/stdin", str(answer_path), str(report_path)]
+            cmd = [str(binary), str(input_path), "/dev/stdout", str(answer_path), str(report_path)]
             completed = subprocess.run(
                 cmd,
                 stdin=sys.stdin.buffer,

diff --git a/challenges/average-permutation-frontier-cs-algorithmic-124/v1/interactive-evaluator/run.py b/challenges/average-permutation-frontier-cs-algorithmic-124/v1/interactive-evaluator/run.py
@@ -190,7 +190,7 @@ def main() -> int:
                 return 0
 
             report_path = tmp_dir / f"report-{index}.txt"
-            cmd = [str(binary), str(input_path), "/dev/stdin", str(answer_path), str(report_path)]
+            cmd = [str(binary), str(input_path), "/dev/stdout", str(answer_path), str(report_path)]
             completed = subprocess.run(
                 cmd,
                 stdin=sys.stdin.buffer,

diff --git a/challenges/big-prize-index-frontier-cs-algorithmic-127/v1/interactive-evaluator/run.py b/challenges/big-prize-index-frontier-cs-algorithmic-127/v1/interactive-evaluator/run.py
@@ -190,7 +190,7 @@ def main() -> int:
                 return 0
 
             report_path = tmp_dir / f"report-{index}.txt"
-            cmd = [str(binary), str(input_path), "/dev/stdin", str(answer_path), str(report_path)]
+            cmd = [str(binary), str(input_path), "/dev/stdout", str(answer_path), str(report_path)]
             completed = subprocess.run(
                 cmd,
                 stdin=sys.stdin.buffer,

diff --git a/challenges/bitwise-or-permutation-frontier-cs-algorithmic-82/README.md b/challenges/bitwise-or-permutation-frontier-cs-algorithmic-82/README.md
@@ -1,21 +1,38 @@
 # Bitwise OR Permutation
 
-This challenge migrates Frontier-CS `algorithmic/problems/82` into an Agentics `separated_evaluator` bundle with the `zip_project` stdin/stdout solution contract.
+This challenge migrates Frontier-CS `algorithmic/problems/82` into an Agentics
+`piped_stdio` interactive bundle with the original Testlib interactor protocol.
 
-Submitted solutions are executed once per run. Each run provides a Frontier-CS-derived benchmark record on stdin, and the solution writes the canonical target answer to stdout. The trusted evaluator compares the submitted output with the run's reference answer after whitespace normalization and reports the average exact-reference score.
+Submitted `zip_project` solutions communicate with the trusted interactive
+evaluator through stdin/stdout. The evaluator owns the hidden permutation,
+answers pairwise bitwise-OR queries, enforces the `4269` query limit, validates
+the final permutation exactly, and reports the source interactor ratio as the
+leaderboard score.
 
 ## Contract
 
-- Read the complete stdin payload for the run.
-- Write the canonical answer tokens to stdout.
-- Whitespace between tokens is ignored, but token values and order must match the reference answer exactly.
+- Read one integer `n` from stdin.
+- Query with `? i j`, where `1 <= i, j <= n` and `i != j`; flush stdout and
+  read `p_i | p_j`.
+- Finish the case with `! p_1 p_2 ... p_n`.
+- Agentics may run multiple original Frontier-CS cases in one session. After a
+  final answer, keep reading; a line containing only `0` ends the session.
+- Malformed commands, out-of-range indices, too many queries, EOF, invalid final
+  permutations, or wrong values are judged by the trusted evaluator.
 - Network access is disabled during setup, build, and run.
 
 ## Provenance
 
 - Source path: `algorithmic/problems/82`
 - Original title: Bitwise OR Permutation
-- Original shape: Frontier-CS interactive-style algorithmic benchmark with source config, statement, interactor, and testdata.
-- Agentics mode: `separated_evaluator`.
+- Original shape: Frontier-CS interactive algorithmic benchmark with
+  `config.yaml`, `statement.txt`, `interactor.cc`, hidden permutations, and
+  source optimal query counts.
+- Agentics mode: `piped_stdio`.
+- Trusted evaluator: copied source `interactor.cc` compiled with Testlib inside
+  `interactive-evaluator/run.py`.
 
-Public validation is intentionally tiny. Official Frontier-CS-derived runs and reference answers are supplied through the required private asset `official-runs` and are not committed.
+Public validation is intentionally tiny. Official Frontier-CS hidden
+permutations and optimal query counts must be supplied through the required
+private asset `official-runs` at `private-benchmark/session.json` and are not
+committed.
diff --git a/challenges/bitwise-or-permutation-frontier-cs-algorithmic-82/agentics.challenge.json b/challenges/bitwise-or-permutation-frontier-cs-algorithmic-82/agentics.challenge.json
@@ -4,13 +4,13 @@
   "challenge_name": "bitwise-or-permutation-frontier-cs-algorithmic-82",
   "title": "Bitwise OR Permutation",
   "summary": {
-    "en": "Recover the hidden permutation from Frontier-CS bitwise-or benchmark records.",
-    "zh": "将 Frontier-CS 题目迁移为离线标准输入输出基准：Bitwise OR Permutation。"
+    "en": "Recover a hidden permutation using interactive pairwise bitwise-OR queries.",
+    "zh": "通过交互式两两按位或查询恢复隐藏排列。"
   },
   "keywords": [
     "permutation",
     "bitwise",
-    "offline"
+    "interactive"
   ],
   "readme_path": "README.md",
   "bundle_path": "v1",
@@ -20,9 +20,9 @@
       "kind": "private_benchmark_data",
       "required": true,
       "required_paths": [
-        "private-benchmark/runs.json"
+        "private-benchmark/session.json"
       ],
-      "asset_note": "ZIP overlay containing private Frontier-CS-derived official run manifest and reference answers."
+      "asset_note": "ZIP overlay containing the official Frontier-CS interactive session manifest, hidden permutations, and source optimal query counts."
     }
   ],
   "ci": {

diff --git a/...bitwise-or-permutation-frontier-cs-algorithmic-82/v1/interactive-evaluator/interactor.cpp b/...bitwise-or-permutation-frontier-cs-algorithmic-82/v1/interactive-evaluator/interactor.cpp
@@ -0,0 +1,157 @@
+#include "testlib.h"
+#include <bits/stdc++.h>
+using namespace std;
+
+// ---------- helpers ----------
+static inline bool parse_ll(const string& s, long long& x) {
+    if (s.empty()) return false;
+    size_t i = 0;
+    if (s[0] == '+' || s[0] == '-') i = 1;
+    if (i == s.size()) return false;
+    for (; i < s.size(); ++i) if (s[i] < '0' || s[i] > '9') return false;
+    try {
+        size_t pos = 0; long long v = stoll(s, &pos, 10);
+        if (pos != s.size()) return false; x = v; return true;
+    } catch (...) { return false; }
+}
+static inline bool parse_int32(const string& s, int& x) {
+    long long t; if (!parse_ll(s, t)) return false;
+    if (t < INT_MIN || t > INT_MAX) return false; x = (int)t; return true;
+}
+
+int main(int argc, char* argv[]) {
+    setName("Permutation OR interactor (stdout version; optimal-queries + ratio + in-bounds checks)");
+    registerInteraction(argc, argv);
+
+    // ---- Problem constraints ----
+    const int N_MIN = 3;
+    const int N_MAX = 2048;
+    const int QUERY_LIMIT = 4269;            // hard limit for answered queries
+    const int SCORE_BASE  = 4269;            // scoring: (5000 - queries) / 10
+    const double MAX_SCORE = SCORE_BASE / 10.0; // 500.0
+
+    // ---- Read hidden instance from 'inf': n and permutation p[1..n] ----
+    int n = inf.readInt(N_MIN, N_MAX, "n");
+    vector<int> p(n + 1);
+    for (int i = 1; i <= n; ++i) {
+        p[i] = inf.readInt(0, n - 1, format("p[%d]", i).c_str());
+    }
+
+    // Validate 'p' is a permutation of [0..n-1]
+    vector<int> cnt(n, 0);
+    for (int i = 1; i <= n; ++i) ++cnt[p[i]];
+    for (int v = 0; v < n; ++v) {
+        ensuref(cnt[v] == 1, "Provided p is not a permutation: value %d occurs %d times.", v, cnt[v]);
+    }
+
+    // ---- Read optimal #queries from '.ans' ----
+    // (This is the number of queries an optimal solution needs.)
+    int optimal_queries = ans.readInt(0, QUERY_LIMIT, "optimal_queries");
+
+    // ---- Public output: announce n and FLUSH ----
+    cout << n << '\n' << flush;
+
+    int queries = 0; // number of answered '?' queries
+
+    auto finalize_with_ratio = [&](double ratio, double unbounded_ratio, const string &fmt, auto... args) {
+        string base = format(fmt.c_str(), args...);
+        quitp(ratio, "%s Ratio: %.4f, RatioUnbounded: %.4f", base.c_str(), ratio, unbounded_ratio);
+    };
+
+    while (true) {
+        // Expect either "?" or "!" from the participant
+        string cmd = ouf.readToken();
+
+        if (cmd == "?") {
+            // Read tokens for i, j and validate manually (in-bounds check)
+            string si = ouf.readToken();
+            string sj = ouf.readToken();
+
+            int i = 0, j = 0;
+            bool okI = parse_int32(si, i);
+            bool okJ = parse_int32(sj, j);
+
+            if (!okI || !okJ) {
+                cout << -1 << '\n' << flush;
+                quitf(_pe, "Invalid query: expected integers for i and j, got '%s' and '%s'.",
+                      compress(si).c_str(), compress(sj).c_str());
+            }
+
+            if (i < 1 || i > n || j < 1 || j > n) {
+                cout << -1 << '\n' << flush;
+                quitf(_wa, "Query out of bounds: i=%d, j=%d (valid range is [1,%d]).", i, j, n);
+            }
+
+            if (i == j) {
+                cout << -1 << '\n' << flush;
+                quitf(_wa, "Invalid query: i == j (%d).", i);
+            }
+
+            if (queries > QUERY_LIMIT) {
+                cout << -1 << '\n' << flush;
+                finalize_with_ratio(0.0, 0.0, "Query limit exceeded: %d > %d.", queries, QUERY_LIMIT);
+            }
+
+            int answ = p[i] | p[j];
+            cout << answ << '\n' << flush;
+            ++queries;
+        }
+        else if (cmd == "!") {
+            // Read final answer: exactly n integers in [0..n-1]
+            vector<int> guess(n + 1);
+            for (int i = 1; i <= n; ++i) {
+                guess[i] = ouf.readInt(0, n - 1, format("ans[%d]", i).c_str());
+            }
+
+            // Check that guess is a permutation
+            vector<int> cg(n, 0);
+            for (int i = 1; i <= n; ++i) ++cg[guess[i]];
+            for (int v = 0; v < n; ++v) {
+                if (cg[v] != 1) {
+                    finalize_with_ratio(0.0, 0.0,
+                                        "Final sequence is not a permutation of [0..%d]: value %d occurs %d times. Queries used: %d.",
+                                        n - 1, v, cg[v], queries);
+                }
+            }
+
+            // Check exact equality with hidden permutation
+            int first_bad = -1;
+            for (int i = 1; i <= n; ++i) {
+                if (guess[i] != p[i]) { first_bad = i; break; }
+            }
+            if (first_bad != -1) {
+                finalize_with_ratio(0.0, 0.0,
+                                    "Wrong permutation at position %d: got %d, expected %d. Queries used: %d.",
+                                    first_bad, guess[first_bad], p[first_bad], queries);
+            }
+
+            // ---- Compute scores and ratio ----
+            int ai_raw_nonneg  = max(0, SCORE_BASE - queries);
+            int opt_raw_nonneg = max(0, SCORE_BASE - optimal_queries);
+
+            double ai_score  = ai_raw_nonneg  / 10.0;
+            double opt_score = opt_raw_nonneg / 10.0;
+
+            double ratio, unbounded_ratio = 1.0;
+            if (opt_score <= 0.0) {
+                ratio = (ai_score <= 0.0) ? 1.0 : 0.0;
+            } else {
+                ratio = ai_score / opt_score;
+                if (ratio < 0.0) ratio = 0.0;
+                unbounded_ratio = max(0.0, ratio);
+                if (ratio > 1.0) ratio = 1.0;
+            }
+
+            finalize_with_ratio(ratio, unbounded_ratio,
+                                "Accepted. Queries used: %d. Your score = (4269 - %d) / 10 = %.1f. "
+                                "Optimal queries: %d. Optimal score = (4269 - %d) / 10 = %.1f.",
+                                queries, queries, ai_score,
+                                optimal_queries, optimal_queries, opt_score);
+        }
+        else {
+            // Unexpected token
+            cout << -1 << '\n' << flush;
+            quitf(_pe, "Expected '?' or '!' but got '%s'.", compress(cmd).c_str());
+        }
+    }
+}