From 5de8c321501b03d5a0d7f6de6b30898045c3c740 Mon Sep 17 00:00:00 2001
From: Pearson <phenri@openai.com>
Date: Tue, 23 Jun 2026 07:33:52 -0400
Subject: [PATCH 1/3] Add bounded target evidence skill

---
 .../.codex-plugin/plugin.json                 |   2 +-
 plugins/life-science-research/README.md       |   4 +-
 .../skills/research-router-skill/SKILL.md     |   4 +-
 .../research-target-evidence-skill/SKILL.md   |  44 ++
 .../agents/openai.yaml                        |   4 +
 .../scripts/research_target_evidence.py       | 642 ++++++++++++++++++
 .../scripts/test_research_target_evidence.py  | 132 ++++
 7 files changed, 829 insertions(+), 3 deletions(-)
 create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
 create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml
 create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
 create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py

diff --git a/plugins/life-science-research/.codex-plugin/plugin.json b/plugins/life-science-research/.codex-plugin/plugin.json
index 40348aa1f..8fae0d7a0 100644
--- a/plugins/life-science-research/.codex-plugin/plugin.json
+++ b/plugins/life-science-research/.codex-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "life-science-research",
-  "version": "1.0.3",
+  "version": "1.1.0",
   "description": "General life-sciences research workflows with query routing, evidence synthesis, and optional parallel subagent analysis across genetics, omics, biology, chemistry, structure, clinical evidence, and public dataset discovery.",
   "author": {
     "name": "OpenAI"
diff --git a/plugins/life-science-research/README.md b/plugins/life-science-research/README.md
index 9833b9817..a0495635f 100644
--- a/plugins/life-science-research/README.md
+++ b/plugins/life-science-research/README.md
@@ -45,7 +45,7 @@ This plugin is meant to support workflows like:
 
 ## Skill Families
 
-The plugin currently bundles 50 skills. The most useful way to think about them is by research area rather than as a flat list.
+The plugin currently bundles 51 skills. The most useful way to think about them is by research area rather than as a flat list.
 
 ### Human Genetics And Variant Evidence
 
@@ -94,6 +94,7 @@ The plugin currently bundles 50 skills. The most useful way to think about them
 
 ### Clinical, Translational, And Disease Evidence
 
+- `research-target-evidence-skill`
 - `clinicaltrials-skill`
 - `cbioportal-skill`
 - `civic-skill`
@@ -165,6 +166,7 @@ Each subagent should receive a bounded objective and return concise findings, ca
 - `Map the most plausible causal genes at this inflammatory bowel disease locus and explain why.`
 - `Summarize known structure, ligand, and pathway information for EGFR.`
 - `Pull ClinicalTrials.gov, ChEMBL, and PharmGKB context for JAK inhibitors in alopecia areata.`
+- `Use $research-target-evidence-skill to separate human and preclinical evidence for ROR1 biology, therapeutic programs, and safety.`
 - `Find metabolomics and proteomics resources relevant to MASLD and PPARG.`
 - `Interpret this variant using ClinVar, gnomAD, Ensembl, and cohort association evidence.`
 
diff --git a/plugins/life-science-research/skills/research-router-skill/SKILL.md b/plugins/life-science-research/skills/research-router-skill/SKILL.md
index 9aff213ef..217df06c3 100644
--- a/plugins/life-science-research/skills/research-router-skill/SKILL.md
+++ b/plugins/life-science-research/skills/research-router-skill/SKILL.md
@@ -68,6 +68,8 @@ Choose the smallest set of skills that can answer the question well.
 
 Examples:
 
+- bounded target biology, program, and safety review:
+  `research-target-evidence-skill`
 - target or disease evidence review:
   `opentargets-skill`, `gwas-catalog-skill`, `gtex-eqtl-skill`, `human-protein-atlas-skill`
 - variant interpretation:
@@ -79,7 +81,7 @@ Examples:
 - chemistry and pharmacology:
   `chembl-skill`, `bindingdb-skill`, `pubchem-pug-skill`, `pharmgkb-skill`
 - clinical and translational:
-  `clinicaltrials-skill`, `cbioportal-skill`, `civic-skill`
+  `research-target-evidence-skill`, `clinicaltrials-skill`, `cbioportal-skill`, `civic-skill`
 - literature and dataset discovery:
   `ncbi-entrez-skill`, `ncbi-pmc-skill`, `biorxiv-skill`, `biostudies-arrayexpress-skill`, `ncbi-datasets-skill`
 
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
new file mode 100644
index 000000000..bcea2b860
--- /dev/null
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
@@ -0,0 +1,44 @@
+---
+name: research-target-evidence-skill
+description: Produce a bounded, source-backed evidence brief for a biological target, covering biology, therapeutic programs, human safety, and preclinical evidence. Use when a user asks for a target assessment, translational evidence, program history, modality comparison, or human-versus-preclinical safety review and wants a fast primary-source research pass.
+---
+
+## Research Target Evidence
+
+Use the bundled script exactly once for the requested target. Let it plan,
+deduplicate, pace, cache, and batch requests across PubMed and
+ClinicalTrials.gov.
+
+Do not decompose the request into additional source calls unless the script
+returns `ok=false` or the user explicitly asks for a deeper follow-up.
+
+## Execution
+
+Extract the target and requested evidence axes, then run:
+
+```bash
+python scripts/research_target_evidence.py \
+  --target "<target>" \
+  --questions biology programs safety \
+  --separate-human-preclinical
+```
+
+The script uses a six-hour response cache by default. Use `--cache-mode off`
+when the user explicitly needs a fresh retrieval.
+
+## Synthesis
+
+- Lead with the target-level conclusion and the largest uncertainty.
+- Separate human evidence from preclinical evidence.
+- Cover only modalities supported by the returned papers or trial records.
+- Link each PMID as `[PMID <id>](https://pubmed.ncbi.nlm.nih.gov/<id>/)`.
+- Link each trial as `[<NCT id>](https://clinicaltrials.gov/study/<NCT id>)`.
+- Distinguish observed human toxicity from preclinical or theoretical risk.
+- Treat registry adverse-event counts as non-attributed unless the record says
+  otherwise.
+- Preserve the returned limitations and current registry statuses.
+- Keep the answer concise enough that the evidence hierarchy remains visible.
+
+The retrieval is bounded and relevance-ranked, not a systematic review. Check
+the script's heuristic human/preclinical classification during synthesis. Do
+not include retrieval telemetry in the user-facing brief unless requested.
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml
new file mode 100644
index 000000000..06c26d113
--- /dev/null
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml
@@ -0,0 +1,4 @@
+interface:
+  display_name: "Research Target Evidence"
+  short_description: "Bounded target biology and safety evidence"
+  default_prompt: "Use $research-target-evidence-skill to summarize target biology, therapeutic programs, and safety evidence."
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
new file mode 100644
index 000000000..531af9606
--- /dev/null
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
@@ -0,0 +1,642 @@
+#!/usr/bin/env python3
+"""Bounded target-evidence retrieval using PubMed and ClinicalTrials.gov."""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import os
+import re
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+CTGOV_STUDIES = "https://clinicaltrials.gov/api/v2/studies"
+USER_AGENT = "research-target-evidence/0.1"
+
+
+class Retriever:
+    def __init__(self, cache_dir: Path, cache_mode: str, ttl_seconds: int) -> None:
+        self.cache_dir = cache_dir
+        self.cache_mode = cache_mode
+        self.ttl_seconds = ttl_seconds
+        self.requests: list[dict[str, Any]] = []
+        self.last_request_by_host: dict[str, float] = {}
+        if cache_mode != "off":
+            cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def _cache_paths(self, url: str) -> tuple[Path, Path]:
+        digest = hashlib.sha256(url.encode("utf-8")).hexdigest()
+        return self.cache_dir / f"{digest}.body", self.cache_dir / f"{digest}.json"
+
+    def _read_cache(self, url: str) -> bytes | None:
+        if self.cache_mode not in {"read-only", "read-write"}:
+            return None
+        body_path, meta_path = self._cache_paths(url)
+        if not body_path.exists() or not meta_path.exists():
+            return None
+        try:
+            meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError):
+            return None
+        if time.time() - float(meta.get("saved_at", 0)) > self.ttl_seconds:
+            return None
+        return body_path.read_bytes()
+
+    def _write_cache(self, url: str, body: bytes) -> None:
+        if self.cache_mode != "read-write":
+            return
+        body_path, meta_path = self._cache_paths(url)
+        body_path.write_bytes(body)
+        meta_path.write_text(
+            json.dumps({"saved_at": time.time()}, sort_keys=True),
+            encoding="utf-8",
+        )
+
+    def _pace(self, host: str) -> None:
+        minimum_interval = 0.38 if host.endswith("ncbi.nlm.nih.gov") else 0.05
+        elapsed = time.monotonic() - self.last_request_by_host.get(host, 0.0)
+        if elapsed < minimum_interval:
+            time.sleep(minimum_interval - elapsed)
+
+    def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes:
+        query = urllib.parse.urlencode(params, doseq=True)
+        url = f"{base_url}?{query}" if query else base_url
+        cached = self._read_cache(url)
+        if cached is not None:
+            self.requests.append(
+                {
+                    "label": label,
+                    "host": urllib.parse.urlparse(url).netloc,
+                    "elapsed_ms": 0,
+                    "bytes": len(cached),
+                    "status": 200,
+                    "cache_hit": True,
+                    "attempt": 0,
+                }
+            )
+            return cached
+
+        host = urllib.parse.urlparse(url).netloc
+        last_error: Exception | None = None
+        for attempt in range(1, 4):
+            self._pace(host)
+            started = time.monotonic()
+            status = 0
+            body = b""
+            try:
+                request = urllib.request.Request(
+                    url, headers={"User-Agent": USER_AGENT}
+                )
+                with urllib.request.urlopen(request, timeout=20) as response:
+                    status = int(response.status)
+                    body = response.read()
+                elapsed_ms = round((time.monotonic() - started) * 1000)
+                self.last_request_by_host[host] = time.monotonic()
+                self.requests.append(
+                    {
+                        "label": label,
+                        "host": host,
+                        "elapsed_ms": elapsed_ms,
+                        "bytes": len(body),
+                        "status": status,
+                        "cache_hit": False,
+                        "attempt": attempt,
+                    }
+                )
+                self._write_cache(url, body)
+                return body
+            except urllib.error.HTTPError as exc:
+                status = exc.code
+                last_error = exc
+            except (urllib.error.URLError, TimeoutError) as exc:
+                last_error = exc
+            elapsed_ms = round((time.monotonic() - started) * 1000)
+            self.last_request_by_host[host] = time.monotonic()
+            self.requests.append(
+                {
+                    "label": label,
+                    "host": host,
+                    "elapsed_ms": elapsed_ms,
+                    "bytes": len(body),
+                    "status": status,
+                    "cache_hit": False,
+                    "attempt": attempt,
+                    "error": str(last_error),
+                }
+            )
+            if status not in {429, 500, 502, 503, 504} and status != 0:
+                break
+            time.sleep(0.8 * (2 ** (attempt - 1)))
+        raise RuntimeError(f"Request failed for {label}: {last_error}")
+
+
+def _ncbi_params(params: dict[str, Any]) -> dict[str, Any]:
+    merged = dict(params)
+    api_key = os.environ.get("NCBI_API_KEY") or os.environ.get("NCBI_EUTILS_API_KEY")
+    if api_key:
+        merged["api_key"] = api_key
+    if os.environ.get("NCBI_TOOL"):
+        merged["tool"] = os.environ["NCBI_TOOL"]
+    if os.environ.get("NCBI_EMAIL"):
+        merged["email"] = os.environ["NCBI_EMAIL"]
+    return merged
+
+
+def _json_request(
+    retriever: Retriever, base_url: str, params: dict[str, Any], label: str
+) -> dict[str, Any]:
+    return json.loads(retriever.get(base_url, params, label).decode("utf-8"))
+
+
+def _text(element: ET.Element | None) -> str:
+    if element is None:
+        return ""
+    return "".join(element.itertext()).strip()
+
+
+def _year(article: ET.Element) -> str | None:
+    for path in (
+        "Journal/JournalIssue/PubDate/Year",
+        "ArticleDate/Year",
+        "Journal/JournalIssue/PubDate/MedlineDate",
+    ):
+        value = article.findtext(path)
+        if value:
+            match = re.search(r"(?:19|20)\d{2}", value)
+            return match.group(0) if match else value[:20]
+    return None
+
+
+def _classify_paper(title: str, abstract: str, publication_types: list[str]) -> str:
+    text = f"{title} {abstract} {' '.join(publication_types)}".lower()
+    human_text = text.replace("patient-derived", "")
+    clinical_publication = any(
+        "clinical trial" in publication_type.lower()
+        for publication_type in publication_types
+    )
+    human_terms = (
+        "phase 1",
+        "phase i",
+        "phase 2",
+        "phase ii",
+        "patients were",
+        "patients with",
+        "patients received",
+        "patients treated",
+        "participants",
+        "first-in-human",
+        "human cancer",
+        "human tissue",
+    )
+    preclinical_terms = (
+        "mouse",
+        "mice",
+        "murine",
+        "xenograft",
+        "cell line",
+        "in vitro",
+        "in vivo model",
+        "preclinical",
+    )
+    human_score = (3 if clinical_publication else 0) + sum(
+        term in human_text for term in human_terms
+    )
+    contextual_publication = any(
+        publication_type.lower() in {"review", "editorial", "comment"}
+        for publication_type in publication_types
+    )
+    if contextual_publication and not clinical_publication:
+        human_score = 0
+    preclinical_score = sum(term in text for term in preclinical_terms)
+    if preclinical_score > human_score:
+        return "preclinical"
+    if human_score:
+        return "human"
+    if preclinical_score:
+        return "preclinical"
+    return "other"
+
+
+def _parse_pubmed_xml(
+    body: bytes, memberships: dict[str, list[str]]
+) -> list[dict[str, Any]]:
+    root = ET.fromstring(body)
+    papers: list[dict[str, Any]] = []
+    for node in root.findall("PubmedArticle"):
+        citation = node.find("MedlineCitation")
+        article = citation.find("Article") if citation is not None else None
+        if citation is None or article is None:
+            continue
+        pmid = citation.findtext("PMID") or ""
+        title = _text(article.find("ArticleTitle"))
+        abstract = " ".join(
+            _text(item)
+            for item in article.findall("Abstract/AbstractText")
+            if _text(item)
+        )
+        publication_types = [
+            _text(item)
+            for item in article.findall("PublicationTypeList/PublicationType")
+        ]
+        ids = {
+            item.attrib.get("IdType", ""): (item.text or "")
+            for item in node.findall("PubmedData/ArticleIdList/ArticleId")
+        }
+        papers.append(
+            {
+                "pmid": pmid,
+                "title": title,
+                "year": _year(article),
+                "journal": article.findtext("Journal/Title"),
+                "doi": ids.get("doi"),
+                "publication_types": publication_types,
+                "classification": _classify_paper(title, abstract, publication_types),
+                "matched_queries": memberships.get(pmid, []),
+                "abstract_excerpt": abstract[:1200],
+                "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
+            }
+        )
+    return papers
+
+
+def _round_robin_ids(groups: dict[str, list[str]], limit: int) -> list[str]:
+    selected: list[str] = []
+    seen: set[str] = set()
+    positions = defaultdict(int)
+    while len(selected) < limit:
+        added = False
+        for name, values in groups.items():
+            position = positions[name]
+            while position < len(values) and values[position] in seen:
+                position += 1
+            positions[name] = position + 1
+            if position < len(values):
+                selected.append(values[position])
+                seen.add(values[position])
+                added = True
+                if len(selected) >= limit:
+                    break
+        if not added:
+            break
+    return selected
+
+
+def _query_pubmed(
+    retriever: Retriever, target: str, max_papers: int
+) -> tuple[dict[str, str], list[dict[str, Any]]]:
+    query_plan = {
+        "biology": f"{target}[Title/Abstract] AND (biology[Title/Abstract] OR signaling[Title/Abstract] OR expression[Title/Abstract] OR function[Title/Abstract])",
+        "human": f'{target}[Title/Abstract] AND (clinical trial[Publication Type] OR "phase 1"[Title/Abstract] OR patient[Title/Abstract] OR safety[Title/Abstract])',
+        "modalities": f'{target}[Title/Abstract] AND (antibody[Title/Abstract] OR ADC[Title/Abstract] OR "CAR T"[Title/Abstract] OR "chimeric antigen receptor"[Title/Abstract])',
+        "safety": f'{target}[Title/Abstract] AND ("normal tissue"[Title/Abstract] OR toxicity[Title/Abstract] OR safety[Title/Abstract] OR adverse[Title/Abstract])',
+    }
+    groups: dict[str, list[str]] = {}
+    memberships: dict[str, list[str]] = defaultdict(list)
+    for name, term in query_plan.items():
+        data = _json_request(
+            retriever,
+            f"{EUTILS_BASE}/esearch.fcgi",
+            _ncbi_params(
+                {
+                    "db": "pubmed",
+                    "term": term,
+                    "retmode": "json",
+                    "retmax": 12,
+                    "sort": "relevance",
+                }
+            ),
+            f"pubmed-esearch-{name}",
+        )
+        ids = data.get("esearchresult", {}).get("idlist", [])
+        groups[name] = ids
+        for pmid in ids:
+            memberships[pmid].append(name)
+
+    candidate_ids = _round_robin_ids(groups, max(max_papers * 3, max_papers))
+    if not candidate_ids:
+        return query_plan, []
+
+    summary = _json_request(
+        retriever,
+        f"{EUTILS_BASE}/esummary.fcgi",
+        _ncbi_params(
+            {"db": "pubmed", "id": ",".join(candidate_ids), "retmode": "json"}
+        ),
+        "pubmed-esummary-selected",
+    )
+    result = summary.get("result", {})
+    target_token = target.lower()
+
+    def rank(pmid: str) -> tuple[int, int]:
+        title = str(result.get(pmid, {}).get("title") or "").lower()
+        title_score = 6 if target_token in title else 0
+        evidence_score = sum(
+            term in title
+            for term in (
+                "clinical",
+                "phase",
+                "patient",
+                "antibody",
+                "car-t",
+                "chimeric antigen receptor",
+                "antibody-drug conjugate",
+                "safety",
+                "normal tissue",
+                "expression",
+            )
+        )
+        return title_score + evidence_score + len(
+            memberships[pmid]
+        ), -candidate_ids.index(pmid)
+
+    selected = sorted(candidate_ids, key=rank, reverse=True)[:max_papers]
+    xml_body = retriever.get(
+        f"{EUTILS_BASE}/efetch.fcgi",
+        _ncbi_params({"db": "pubmed", "id": ",".join(selected), "retmode": "xml"}),
+        "pubmed-efetch-selected",
+    )
+    return query_plan, _parse_pubmed_xml(xml_body, memberships)
+
+
+def _serious_events(study: dict[str, Any]) -> list[dict[str, Any]]:
+    module = study.get("resultsSection", {}).get("adverseEventsModule", {})
+    events = module.get("seriousEvents", [])
+    summarized: list[dict[str, Any]] = []
+    for event in events:
+        affected = sum(
+            int(stat.get("numAffected") or 0) for stat in event.get("stats", [])
+        )
+        if affected:
+            summarized.append(
+                {
+                    "term": event.get("term"),
+                    "organ_system": event.get("organSystem"),
+                    "num_affected": affected,
+                }
+            )
+    summarized.sort(key=lambda item: item["num_affected"], reverse=True)
+    return summarized[:8]
+
+
+def _trial_relevance(study: dict[str, Any], target: str) -> int:
+    protocol = study.get("protocolSection", {})
+    identification = protocol.get("identificationModule", {})
+    descriptions = protocol.get("descriptionModule", {})
+    interventions = protocol.get("armsInterventionsModule", {}).get("interventions", [])
+    target_pattern = re.compile(rf"\b{re.escape(target)}\b", re.IGNORECASE)
+    title_text = " ".join(
+        str(identification.get(field) or "")
+        for field in ("briefTitle", "officialTitle")
+    )
+    intervention_text = " ".join(
+        " ".join([str(item.get("name") or ""), *map(str, item.get("otherNames", []))])
+        for item in interventions
+    )
+    summary_text = " ".join(
+        str(descriptions.get(field) or "")
+        for field in ("briefSummary", "detailedDescription")
+    )
+    condition_text = " ".join(
+        map(str, protocol.get("conditionsModule", {}).get("conditions", []))
+    )
+    title_match = bool(target_pattern.search(title_text))
+    intervention_match = bool(target_pattern.search(intervention_text))
+    summary_match = bool(target_pattern.search(summary_text))
+    condition_match = bool(target_pattern.search(condition_text))
+    if (
+        summary_match
+        and not title_match
+        and not intervention_match
+        and not condition_match
+        and protocol.get("designModule", {}).get("studyType") != "INTERVENTIONAL"
+    ):
+        return 0
+    return (
+        5 * title_match + 5 * intervention_match + 2 * summary_match + condition_match
+    )
+
+
+def _trial_program_tokens(
+    study: dict[str, Any], *, include_interventions: bool = True
+) -> set[str]:
+    protocol = study.get("protocolSection", {})
+    identification = protocol.get("identificationModule", {})
+    interventions = protocol.get("armsInterventionsModule", {}).get("interventions", [])
+    values = [
+        str(identification.get("briefTitle") or ""),
+        str(identification.get("officialTitle") or ""),
+    ]
+    if include_interventions:
+        values.extend(str(item.get("name") or "") for item in interventions)
+        values.extend(
+            str(alias) for item in interventions for alias in item.get("otherNames", [])
+        )
+    text = " ".join(values).lower()
+    generic = {
+        "advanced",
+        "antibody",
+        "cancer",
+        "cells",
+        "clinical",
+        "combination",
+        "escalation",
+        "expansion",
+        "malignancies",
+        "patients",
+        "phase",
+        "solid",
+        "study",
+        "therapy",
+        "treatment",
+        "tumor",
+        "tumors",
+    }
+    return {
+        token
+        for token in re.findall(r"[a-z0-9][a-z0-9-]{4,}", text)
+        if token not in generic
+    }
+
+
+def _parse_trials(
+    data: dict[str, Any], target: str, max_trials: int
+) -> list[dict[str, Any]]:
+    trials: list[dict[str, Any]] = []
+    ranked_studies = [
+        (study, _trial_relevance(study, target)) for study in data.get("studies", [])
+    ]
+    direct_program_tokens = set().union(
+        *(
+            _trial_program_tokens(study, include_interventions=False)
+            for study, relevance in ranked_studies
+            if relevance >= 5
+        )
+    )
+    for study, relevance in ranked_studies:
+        if relevance == 0 or (
+            relevance == 2
+            and not (_trial_program_tokens(study) & direct_program_tokens)
+        ):
+            continue
+        protocol = study.get("protocolSection", {})
+        identification = protocol.get("identificationModule", {})
+        status = protocol.get("statusModule", {})
+        design = protocol.get("designModule", {})
+        interventions = protocol.get("armsInterventionsModule", {}).get(
+            "interventions", []
+        )
+        nct_id = identification.get("nctId")
+        outcomes = (
+            study.get("resultsSection", {})
+            .get("outcomeMeasuresModule", {})
+            .get("outcomeMeasures", [])
+        )
+        trials.append(
+            {
+                "nct_id": nct_id,
+                "title": identification.get("briefTitle"),
+                "status": status.get("overallStatus"),
+                "why_stopped": status.get("whyStopped"),
+                "phases": design.get("phases", []),
+                "enrollment": design.get("enrollmentInfo"),
+                "conditions": protocol.get("conditionsModule", {}).get(
+                    "conditions", []
+                ),
+                "interventions": [
+                    {
+                        "name": item.get("name"),
+                        "type": item.get("type"),
+                        "other_names": item.get("otherNames", []),
+                    }
+                    for item in interventions[:8]
+                ],
+                "brief_summary": protocol.get("descriptionModule", {}).get(
+                    "briefSummary"
+                ),
+                "has_results": bool(study.get("hasResults")),
+                "primary_outcomes": [
+                    {
+                        "title": outcome.get("title"),
+                        "description": outcome.get("description"),
+                    }
+                    for outcome in outcomes
+                    if outcome.get("type") == "PRIMARY"
+                ][:5],
+                "serious_adverse_events": _serious_events(study),
+                "target_relevance": relevance,
+                "url": f"https://clinicaltrials.gov/study/{nct_id}" if nct_id else None,
+            }
+        )
+    trials.sort(
+        key=lambda item: (
+            -item["target_relevance"],
+            not item["has_results"],
+            item["status"] or "",
+        )
+    )
+    return trials[:max_trials]
+
+
+def _query_trials(
+    retriever: Retriever, target: str, max_trials: int
+) -> tuple[int | None, list[dict[str, Any]]]:
+    data = _json_request(
+        retriever,
+        CTGOV_STUDIES,
+        {
+            "query.term": target,
+            "pageSize": 100,
+            "countTotal": "true",
+            "format": "json",
+        },
+        "clinicaltrials-target-search",
+    )
+    return data.get("totalCount"), _parse_trials(data, target, max_trials)
+
+
+def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]:
+    network = [request for request in retriever.requests if not request["cache_hit"]]
+    return {
+        "elapsed_seconds": round(time.monotonic() - started, 3),
+        "request_attempts": len(retriever.requests),
+        "network_requests": len(network),
+        "cache_hits": sum(request["cache_hit"] for request in retriever.requests),
+        "retries": sum(request["attempt"] > 1 for request in retriever.requests),
+        "rate_limit_events": sum(
+            request["status"] == 429 for request in retriever.requests
+        ),
+        "bytes_received": sum(request["bytes"] for request in network),
+        "requests": retriever.requests,
+    }
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--target", required=True)
+    parser.add_argument(
+        "--questions", nargs="+", default=["biology", "programs", "safety"]
+    )
+    parser.add_argument("--separate-human-preclinical", action="store_true")
+    parser.add_argument("--max-papers", type=int, default=14)
+    parser.add_argument("--max-trials", type=int, default=20)
+    parser.add_argument(
+        "--cache-mode", choices=["off", "read-only", "read-write"], default="read-write"
+    )
+    parser.add_argument("--cache-ttl-seconds", type=int, default=21600)
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    started = time.monotonic()
+    cache_dir = Path.home() / ".cache" / "codex" / "research-target-evidence"
+    retriever = Retriever(cache_dir, args.cache_mode, args.cache_ttl_seconds)
+    try:
+        query_plan, papers = _query_pubmed(retriever, args.target, args.max_papers)
+        total_trials, trials = _query_trials(retriever, args.target, args.max_trials)
+        grouped_papers = {
+            group: [paper for paper in papers if paper["classification"] == group]
+            for group in ("human", "preclinical", "other")
+        }
+        output = {
+            "ok": True,
+            "target": args.target,
+            "questions": args.questions,
+            "separate_human_preclinical": args.separate_human_preclinical,
+            "query_plan": query_plan,
+            "papers": grouped_papers,
+            "trials": {
+                "total_count": total_trials,
+                "records": trials,
+            },
+            "limitations": [
+                "PubMed retrieval is relevance-ranked and bounded; it is not a systematic review.",
+                "Paper classification is heuristic and should be checked during synthesis.",
+                "ClinicalTrials.gov event counts are not automatically treatment-attributed.",
+                "Current program status is limited to the retrieved registry snapshot.",
+            ],
+            "telemetry": _telemetry(retriever, started),
+        }
+    except Exception as exc:  # noqa: BLE001
+        output = {
+            "ok": False,
+            "target": args.target,
+            "error": {"type": type(exc).__name__, "message": str(exc)},
+            "telemetry": _telemetry(retriever, started),
+        }
+    json.dump(output, sys.stdout, separators=(",", ":"), ensure_ascii=True)
+    sys.stdout.write("\n")
+    return 0 if output["ok"] else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
new file mode 100644
index 000000000..43e335896
--- /dev/null
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import importlib.util
+import json
+import tempfile
+import unittest
+from pathlib import Path
+
+SCRIPT_PATH = Path(__file__).with_name("research_target_evidence.py")
+SPEC = importlib.util.spec_from_file_location("research_target_evidence", SCRIPT_PATH)
+assert SPEC and SPEC.loader
+research_target_evidence = importlib.util.module_from_spec(SPEC)
+SPEC.loader.exec_module(research_target_evidence)
+
+
+def trial(
+    nct_id: str,
+    title: str,
+    *,
+    official_title: str = "",
+    summary: str = "",
+    intervention: str = "",
+) -> dict:
+    return {
+        "hasResults": False,
+        "protocolSection": {
+            "identificationModule": {
+                "nctId": nct_id,
+                "briefTitle": title,
+                "officialTitle": official_title,
+            },
+            "statusModule": {"overallStatus": "COMPLETED"},
+            "designModule": {
+                "studyType": "INTERVENTIONAL",
+                "phases": ["PHASE1"],
+                "enrollmentInfo": {"count": 10, "type": "ACTUAL"},
+            },
+            "descriptionModule": {"briefSummary": summary},
+            "conditionsModule": {"conditions": ["Cancer"]},
+            "armsInterventionsModule": {
+                "interventions": (
+                    [{"name": intervention, "type": "DRUG", "otherNames": []}]
+                    if intervention
+                    else []
+                )
+            },
+        },
+    }
+
+
+class PaperClassificationTests(unittest.TestCase):
+    def test_patient_derived_xenograft_is_preclinical(self) -> None:
+        classification = research_target_evidence._classify_paper(
+            "Target activity in patient-derived xenografts",
+            "The treatment reduced growth in mice and cell lines.",
+            ["Journal Article"],
+        )
+
+        self.assertEqual(classification, "preclinical")
+
+    def test_phase_one_trial_is_human(self) -> None:
+        classification = research_target_evidence._classify_paper(
+            "Phase I target study",
+            "Patients with advanced cancer received treatment.",
+            ["Clinical Trial, Phase I"],
+        )
+
+        self.assertEqual(classification, "human")
+
+    def test_review_is_context_not_direct_human_evidence(self) -> None:
+        classification = research_target_evidence._classify_paper(
+            "Target review",
+            "This review discusses phase I studies and patients with cancer.",
+            ["Review"],
+        )
+
+        self.assertEqual(classification, "other")
+
+
+class TrialFilteringTests(unittest.TestCase):
+    def test_summary_only_record_requires_a_known_program_alias(self) -> None:
+        records = research_target_evidence._parse_trials(
+            {
+                "studies": [
+                    trial(
+                        "NCT00000001",
+                        "Study of Cirmtuzumab",
+                        official_title="A ROR1-targeted antibody study",
+                        intervention="Cirmtuzumab",
+                    ),
+                    trial(
+                        "NCT00000002",
+                        "Cirmtuzumab extension study",
+                        summary="The antibody binds ROR1.",
+                        intervention="Cirmtuzumab",
+                    ),
+                    trial(
+                        "NCT00000003",
+                        "Broad sequencing study",
+                        summary="The panel includes ROR1 among many genes.",
+                        intervention="Genome sequencing",
+                    ),
+                ]
+            },
+            "ROR1",
+            10,
+        )
+
+        self.assertEqual(
+            [record["nct_id"] for record in records],
+            ["NCT00000001", "NCT00000002"],
+        )
+
+
+class CacheTests(unittest.TestCase):
+    def test_cache_metadata_does_not_persist_request_url(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            retriever = research_target_evidence.Retriever(
+                Path(directory), "read-write", 60
+            )
+            url = "https://example.test/data?api_key=secret"
+
+            retriever._write_cache(url, b"payload")
+
+            _, metadata_path = retriever._cache_paths(url)
+            metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
+            self.assertEqual(set(metadata), {"saved_at"})
+
+
+if __name__ == "__main__":
+    unittest.main()

From b6861935b5891888da960c981c4ece3f28d6b27c Mon Sep 17 00:00:00 2001
From: Pearson <phenri@openai.com>
Date: Tue, 23 Jun 2026 07:41:11 -0400
Subject: [PATCH 2/3] Remove target evidence cache

---
 .../research-target-evidence-skill/SKILL.md   |  5 +-
 .../scripts/research_target_evidence.py       | 68 ++-----------------
 .../scripts/test_research_target_evidence.py  | 46 +++++++++----
 3 files changed, 36 insertions(+), 83 deletions(-)

diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
index bcea2b860..a2ac620e6 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
@@ -6,7 +6,7 @@ description: Produce a bounded, source-backed evidence brief for a biological ta
 ## Research Target Evidence
 
 Use the bundled script exactly once for the requested target. Let it plan,
-deduplicate, pace, cache, and batch requests across PubMed and
+deduplicate, pace, and batch requests across PubMed and
 ClinicalTrials.gov.
 
 Do not decompose the request into additional source calls unless the script
@@ -23,9 +23,6 @@ python scripts/research_target_evidence.py \
   --separate-human-preclinical
 ```
 
-The script uses a six-hour response cache by default. Use `--cache-mode off`
-when the user explicitly needs a fresh retrieval.
-
 ## Synthesis
 
 - Lead with the target-level conclusion and the largest uncertainty.
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
index 531af9606..87300cc33 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 import argparse
-import hashlib
 import json
 import os
 import re
@@ -15,7 +14,6 @@
 import urllib.request
 import xml.etree.ElementTree as ET
 from collections import defaultdict
-from pathlib import Path
 from typing import Any
 
 EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
@@ -24,42 +22,9 @@
 
 
 class Retriever:
-    def __init__(self, cache_dir: Path, cache_mode: str, ttl_seconds: int) -> None:
-        self.cache_dir = cache_dir
-        self.cache_mode = cache_mode
-        self.ttl_seconds = ttl_seconds
+    def __init__(self) -> None:
         self.requests: list[dict[str, Any]] = []
         self.last_request_by_host: dict[str, float] = {}
-        if cache_mode != "off":
-            cache_dir.mkdir(parents=True, exist_ok=True)
-
-    def _cache_paths(self, url: str) -> tuple[Path, Path]:
-        digest = hashlib.sha256(url.encode("utf-8")).hexdigest()
-        return self.cache_dir / f"{digest}.body", self.cache_dir / f"{digest}.json"
-
-    def _read_cache(self, url: str) -> bytes | None:
-        if self.cache_mode not in {"read-only", "read-write"}:
-            return None
-        body_path, meta_path = self._cache_paths(url)
-        if not body_path.exists() or not meta_path.exists():
-            return None
-        try:
-            meta = json.loads(meta_path.read_text(encoding="utf-8"))
-        except (OSError, json.JSONDecodeError):
-            return None
-        if time.time() - float(meta.get("saved_at", 0)) > self.ttl_seconds:
-            return None
-        return body_path.read_bytes()
-
-    def _write_cache(self, url: str, body: bytes) -> None:
-        if self.cache_mode != "read-write":
-            return
-        body_path, meta_path = self._cache_paths(url)
-        body_path.write_bytes(body)
-        meta_path.write_text(
-            json.dumps({"saved_at": time.time()}, sort_keys=True),
-            encoding="utf-8",
-        )
 
     def _pace(self, host: str) -> None:
         minimum_interval = 0.38 if host.endswith("ncbi.nlm.nih.gov") else 0.05
@@ -70,21 +35,6 @@ def _pace(self, host: str) -> None:
     def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes:
         query = urllib.parse.urlencode(params, doseq=True)
         url = f"{base_url}?{query}" if query else base_url
-        cached = self._read_cache(url)
-        if cached is not None:
-            self.requests.append(
-                {
-                    "label": label,
-                    "host": urllib.parse.urlparse(url).netloc,
-                    "elapsed_ms": 0,
-                    "bytes": len(cached),
-                    "status": 200,
-                    "cache_hit": True,
-                    "attempt": 0,
-                }
-            )
-            return cached
-
         host = urllib.parse.urlparse(url).netloc
         last_error: Exception | None = None
         for attempt in range(1, 4):
@@ -108,11 +58,9 @@ def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes:
                         "elapsed_ms": elapsed_ms,
                         "bytes": len(body),
                         "status": status,
-                        "cache_hit": False,
                         "attempt": attempt,
                     }
                 )
-                self._write_cache(url, body)
                 return body
             except urllib.error.HTTPError as exc:
                 status = exc.code
@@ -128,7 +76,6 @@ def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes:
                     "elapsed_ms": elapsed_ms,
                     "bytes": len(body),
                     "status": status,
-                    "cache_hit": False,
                     "attempt": attempt,
                     "error": str(last_error),
                 }
@@ -564,17 +511,15 @@ def _query_trials(
 
 
 def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]:
-    network = [request for request in retriever.requests if not request["cache_hit"]]
     return {
         "elapsed_seconds": round(time.monotonic() - started, 3),
         "request_attempts": len(retriever.requests),
-        "network_requests": len(network),
-        "cache_hits": sum(request["cache_hit"] for request in retriever.requests),
+        "network_requests": len(retriever.requests),
         "retries": sum(request["attempt"] > 1 for request in retriever.requests),
         "rate_limit_events": sum(
             request["status"] == 429 for request in retriever.requests
         ),
-        "bytes_received": sum(request["bytes"] for request in network),
+        "bytes_received": sum(request["bytes"] for request in retriever.requests),
         "requests": retriever.requests,
     }
 
@@ -588,18 +533,13 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--separate-human-preclinical", action="store_true")
     parser.add_argument("--max-papers", type=int, default=14)
     parser.add_argument("--max-trials", type=int, default=20)
-    parser.add_argument(
-        "--cache-mode", choices=["off", "read-only", "read-write"], default="read-write"
-    )
-    parser.add_argument("--cache-ttl-seconds", type=int, default=21600)
     return parser.parse_args()
 
 
 def main() -> int:
     args = parse_args()
     started = time.monotonic()
-    cache_dir = Path.home() / ".cache" / "codex" / "research-target-evidence"
-    retriever = Retriever(cache_dir, args.cache_mode, args.cache_ttl_seconds)
+    retriever = Retriever()
     try:
         query_plan, papers = _query_pubmed(retriever, args.target, args.max_papers)
         total_trials, trials = _query_trials(retriever, args.target, args.max_trials)
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
index 43e335896..a8f5d8d96 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
@@ -2,8 +2,7 @@
 from __future__ import annotations
 
 import importlib.util
-import json
-import tempfile
+import time
 import unittest
 from pathlib import Path
 
@@ -113,19 +112,36 @@ def test_summary_only_record_requires_a_known_program_alias(self) -> None:
         )
 
 
-class CacheTests(unittest.TestCase):
-    def test_cache_metadata_does_not_persist_request_url(self) -> None:
-        with tempfile.TemporaryDirectory() as directory:
-            retriever = research_target_evidence.Retriever(
-                Path(directory), "read-write", 60
-            )
-            url = "https://example.test/data?api_key=secret"
-
-            retriever._write_cache(url, b"payload")
-
-            _, metadata_path = retriever._cache_paths(url)
-            metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
-            self.assertEqual(set(metadata), {"saved_at"})
+class TelemetryTests(unittest.TestCase):
+    def test_telemetry_reports_request_metrics(self) -> None:
+        retriever = research_target_evidence.Retriever()
+        retriever.requests = [
+            {
+                "label": "example",
+                "host": "example.test",
+                "elapsed_ms": 10,
+                "bytes": 100,
+                "status": 200,
+                "attempt": 1,
+            }
+        ]
+
+        telemetry = research_target_evidence._telemetry(retriever, time.monotonic())
+
+        self.assertEqual(telemetry["network_requests"], 1)
+        self.assertEqual(telemetry["bytes_received"], 100)
+        self.assertEqual(
+            set(telemetry),
+            {
+                "elapsed_seconds",
+                "request_attempts",
+                "network_requests",
+                "retries",
+                "rate_limit_events",
+                "bytes_received",
+                "requests",
+            },
+        )
 
 
 if __name__ == "__main__":

From e68755c1bb29dd450f7f81ce79b6a2df79c5da6c Mon Sep 17 00:00:00 2001
From: Pearson <phenri@openai.com>
Date: Tue, 23 Jun 2026 11:26:55 -0400
Subject: [PATCH 3/3] Add multi-target evidence comparison

---
 .../research-target-evidence-skill/SKILL.md   |  29 +-
 .../agents/openai.yaml                        |   4 +-
 .../scripts/research_target_evidence.py       | 458 +++++++++++++++---
 .../scripts/test_research_target_evidence.py  | 229 ++++++++-
 4 files changed, 633 insertions(+), 87 deletions(-)

diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
index a2ac620e6..4de35d090 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: research-target-evidence-skill
-description: Produce a bounded, source-backed evidence brief for a biological target, covering biology, therapeutic programs, human safety, and preclinical evidence. Use when a user asks for a target assessment, translational evidence, program history, modality comparison, or human-versus-preclinical safety review and wants a fast primary-source research pass.
+description: Produce bounded, source-backed evidence briefs for one or more biological targets, covering biology, therapeutic programs, human safety, preclinical evidence, and cross-target comparison. Use when a user asks for a target assessment, target comparison, translational evidence, program history, modality comparison, or human-versus-preclinical safety review and wants a fast primary-source research pass.
 ---
 
 ## Research Target Evidence
 
-Use the bundled script exactly once for the requested target. Let it plan,
-deduplicate, pace, and batch requests across PubMed and
+Use the bundled script exactly once for all requested targets. Let it plan,
+deduplicate, globally pace, and batch requests across PubMed and
 ClinicalTrials.gov.
 
 Do not decompose the request into additional source calls unless the script
@@ -14,7 +14,7 @@ returns `ok=false` or the user explicitly asks for a deeper follow-up.
 
 ## Execution
 
-Extract the target and requested evidence axes, then run:
+For one target, run:
 
 ```bash
 python scripts/research_target_evidence.py \
@@ -23,10 +23,29 @@ python scripts/research_target_evidence.py \
   --separate-human-preclinical
 ```
 
+For a comparison, repeat `--target` in the same command:
+
+```bash
+python scripts/research_target_evidence.py \
+  --target "<target 1>" \
+  --target "<target 2>" \
+  --target "<target 3>" \
+  --mode compare \
+  --questions biology programs safety \
+  --separate-human-preclinical
+```
+
+Do not run one process per target. The shared process enforces global source
+pacing, preserves landmark-program and evidence-class quotas, and emits a
+single size-bounded JSON result.
+
 ## Synthesis
 
 - Lead with the target-level conclusion and the largest uncertainty.
 - Separate human evidence from preclinical evidence.
+- For comparisons, use the same evidence axes for every target and finish with
+  a compact comparison of validation, selectivity, safety, modality maturity,
+  and uncertainty.
 - Cover only modalities supported by the returned papers or trial records.
 - Link each PMID as `[PMID <id>](https://pubmed.ncbi.nlm.nih.gov/<id>/)`.
 - Link each trial as `[<NCT id>](https://clinicaltrials.gov/study/<NCT id>)`.
@@ -34,6 +53,8 @@ python scripts/research_target_evidence.py \
 - Treat registry adverse-event counts as non-attributed unless the record says
   otherwise.
 - Preserve the returned limitations and current registry statuses.
+- Report per-target errors or omitted evidence counts rather than silently
+  treating a partial result as complete.
 - Keep the answer concise enough that the evidence hierarchy remains visible.
 
 The retrieval is bounded and relevance-ranked, not a systematic review. Check
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml
index 06c26d113..590c8c94b 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
   display_name: "Research Target Evidence"
-  short_description: "Bounded target biology and safety evidence"
-  default_prompt: "Use $research-target-evidence-skill to summarize target biology, therapeutic programs, and safety evidence."
+  short_description: "Bounded target evidence and comparison"
+  default_prompt: "Use $research-target-evidence-skill to assess or compare target biology, therapeutic programs, and safety evidence."
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
index 87300cc33..0191545b2 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py
@@ -18,7 +18,14 @@
 
 EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
 CTGOV_STUDIES = "https://clinicaltrials.gov/api/v2/studies"
-USER_AGENT = "research-target-evidence/0.1"
+USER_AGENT = "research-target-evidence/0.2"
+MAX_TARGETS = 6
+DEFAULT_OUTPUT_CHARS = 30_000
+
+
+def _safe_error(error: Exception | None) -> str:
+    message = str(error or "unknown error")
+    return re.sub(r"([?&]api_key=)[^&\s]+", r"\1<redacted>", message)
 
 
 class Retriever:
@@ -77,13 +84,13 @@ def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes:
                     "bytes": len(body),
                     "status": status,
                     "attempt": attempt,
-                    "error": str(last_error),
+                    "error": _safe_error(last_error),
                 }
             )
             if status not in {429, 500, 502, 503, 504} and status != 0:
                 break
             time.sleep(0.8 * (2 ** (attempt - 1)))
-        raise RuntimeError(f"Request failed for {label}: {last_error}")
+        raise RuntimeError(f"Request failed for {label}: {_safe_error(last_error)}")
 
 
 def _ncbi_params(params: dict[str, Any]) -> dict[str, Any]:
@@ -123,6 +130,43 @@ def _year(article: ET.Element) -> str | None:
     return None
 
 
+def _clip(text: str, limit: int) -> str:
+    normalized = " ".join(text.split())
+    if len(normalized) <= limit:
+        return normalized
+    clipped = normalized[: max(0, limit - 1)].rsplit(" ", 1)[0]
+    return f"{clipped}..." if clipped else normalized[:limit]
+
+
+def _best_excerpt(
+    abstract: str,
+    terms: tuple[str, ...],
+    limit: int = 350,
+    *,
+    require_term: bool = False,
+) -> str:
+    sentences = [
+        sentence.strip()
+        for sentence in re.split(r"(?<=[.!?])\s+", " ".join(abstract.split()))
+        if sentence.strip()
+    ]
+    if not sentences:
+        return ""
+
+    def score(sentence: str) -> tuple[int, int, int]:
+        lowered = sentence.lower()
+        term_score = sum(term in lowered for term in terms)
+        numeric_score = int(bool(re.search(r"\b\d+(?:\.\d+)?%?\b", sentence)))
+        return term_score, numeric_score, -sentences.index(sentence)
+
+    best = max(sentences, key=score)
+    if require_term and score(best)[0] == 0:
+        return ""
+    if score(best)[:2] == (0, 0):
+        best = sentences[0]
+    return _clip(best, limit)
+
+
 def _classify_paper(title: str, abstract: str, publication_types: list[str]) -> str:
     text = f"{title} {abstract} {' '.join(publication_types)}".lower()
     human_text = text.replace("patient-derived", "")
@@ -205,16 +249,75 @@ def _parse_pubmed_xml(
                 "year": _year(article),
                 "journal": article.findtext("Journal/Title"),
                 "doi": ids.get("doi"),
-                "publication_types": publication_types,
                 "classification": _classify_paper(title, abstract, publication_types),
                 "matched_queries": memberships.get(pmid, []),
-                "abstract_excerpt": abstract[:1200],
+                "result_excerpt": _best_excerpt(
+                    abstract,
+                    (
+                        "response",
+                        "survival",
+                        "progression",
+                        "efficacy",
+                        "randomized",
+                        "patients",
+                        "participants",
+                        "objective",
+                        "complete remission",
+                        "partial remission",
+                        "did not",
+                        "no response",
+                    ),
+                ),
+                "safety_excerpt": _best_excerpt(
+                    abstract,
+                    (
+                        "adverse",
+                        "safety",
+                        "toxicity",
+                        "cytokine",
+                        "neutropenia",
+                        "diarrhea",
+                        "nausea",
+                        "vomiting",
+                        "neuropathy",
+                        "pneumonitis",
+                        "death",
+                        "grade 3",
+                        "grade 4",
+                        "tolerated",
+                    ),
+                    require_term=True,
+                ),
                 "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
             }
         )
     return papers
 
 
+def _select_paper_cards(
+    papers: list[dict[str, Any]], max_papers: int
+) -> list[dict[str, Any]]:
+    selected: list[dict[str, Any]] = []
+
+    def take(predicate: Any) -> None:
+        paper = next(
+            (item for item in papers if item not in selected and predicate(item)), None
+        )
+        if paper is not None and len(selected) < max_papers:
+            selected.append(paper)
+
+    take(lambda item: "landmark" in item["matched_queries"])
+    take(lambda item: "landmark" in item["matched_queries"])
+    take(lambda item: item["classification"] == "human")
+    take(lambda item: "safety" in item["matched_queries"])
+    take(lambda item: item["classification"] == "preclinical")
+    take(lambda item: "biology" in item["matched_queries"])
+    for paper in papers:
+        if paper not in selected and len(selected) < max_papers:
+            selected.append(paper)
+    return selected
+
+
 def _round_robin_ids(groups: dict[str, list[str]], limit: int) -> list[str]:
     selected: list[str] = []
     seen: set[str] = set()
@@ -238,13 +341,16 @@ def _round_robin_ids(groups: dict[str, list[str]], limit: int) -> list[str]:
 
 
 def _query_pubmed(
-    retriever: Retriever, target: str, max_papers: int
-) -> tuple[dict[str, str], list[dict[str, Any]]]:
+    retriever: Retriever,
+    target: str,
+    max_papers: int,
+) -> tuple[dict[str, str], list[dict[str, Any]], int]:
     query_plan = {
         "biology": f"{target}[Title/Abstract] AND (biology[Title/Abstract] OR signaling[Title/Abstract] OR expression[Title/Abstract] OR function[Title/Abstract])",
-        "human": f'{target}[Title/Abstract] AND (clinical trial[Publication Type] OR "phase 1"[Title/Abstract] OR patient[Title/Abstract] OR safety[Title/Abstract])',
+        "human": f"{target}[Title/Abstract] AND (clinical trial[Publication Type] OR phase[Title/Abstract] OR randomized[Title/Abstract] OR patient[Title/Abstract] OR safety[Title/Abstract])",
         "modalities": f'{target}[Title/Abstract] AND (antibody[Title/Abstract] OR ADC[Title/Abstract] OR "CAR T"[Title/Abstract] OR "chimeric antigen receptor"[Title/Abstract])',
         "safety": f'{target}[Title/Abstract] AND ("normal tissue"[Title/Abstract] OR toxicity[Title/Abstract] OR safety[Title/Abstract] OR adverse[Title/Abstract])',
+        "landmark": f'{target}[Title/Abstract] AND (randomized[Title/Abstract] OR randomised[Title/Abstract] OR "phase 2"[Title/Abstract] OR "phase II"[Title/Abstract] OR "phase 3"[Title/Abstract] OR "phase III"[Title/Abstract] OR terminated[Title/Abstract] OR failed[Title/Abstract])',
     }
     groups: dict[str, list[str]] = {}
     memberships: dict[str, list[str]] = defaultdict(list)
@@ -257,7 +363,7 @@ def _query_pubmed(
                     "db": "pubmed",
                     "term": term,
                     "retmode": "json",
-                    "retmax": 12,
+                    "retmax": 30 if name == "landmark" else 12,
                     "sort": "relevance",
                 }
             ),
@@ -268,9 +374,10 @@ def _query_pubmed(
         for pmid in ids:
             memberships[pmid].append(name)
 
-    candidate_ids = _round_robin_ids(groups, max(max_papers * 3, max_papers))
+    available_count = len(memberships)
+    candidate_ids = _round_robin_ids(groups, max(max_papers * 10, 80))
     if not candidate_ids:
-        return query_plan, []
+        return query_plan, [], available_count
 
     summary = _json_request(
         retriever,
@@ -282,6 +389,9 @@ def _query_pubmed(
     )
     result = summary.get("result", {})
     target_token = target.lower()
+    candidate_positions = {
+        pmid: position for position, pmid in enumerate(candidate_ids)
+    }
 
     def rank(pmid: str) -> tuple[int, int]:
         title = str(result.get(pmid, {}).get("title") or "").lower()
@@ -303,15 +413,49 @@ def rank(pmid: str) -> tuple[int, int]:
         )
         return title_score + evidence_score + len(
             memberships[pmid]
-        ), -candidate_ids.index(pmid)
+        ), -candidate_positions.get(pmid, len(candidate_ids))
 
-    selected = sorted(candidate_ids, key=rank, reverse=True)[:max_papers]
+    def landmark_rank(pmid: str) -> tuple[int, int]:
+        title = str(result.get(pmid, {}).get("title") or "").lower()
+        evidence_score = sum(
+            weight * (term in title)
+            for term, weight in (
+                ("randomized", 5),
+                ("randomised", 5),
+                ("placebo", 3),
+                ("phase iii", 3),
+                ("phase 3", 3),
+                ("phase ii", 2),
+                ("phase 2", 2),
+                ("terminated", 2),
+                ("failed", 2),
+            )
+        )
+        return evidence_score, -candidate_positions.get(pmid, len(candidate_ids))
+
+    ranked = sorted(candidate_ids, key=rank, reverse=True)
+    fetch_limit = min(len(ranked), max(max_papers * 2, max_papers))
+    selected: list[str] = []
+    for group_name in ("landmark", "human", "safety", "biology", "modalities"):
+        group_ranked = sorted(
+            groups.get(group_name, []),
+            key=landmark_rank if group_name == "landmark" else rank,
+            reverse=True,
+        )
+        first = next((pmid for pmid in group_ranked if pmid not in selected), None)
+        if first:
+            selected.append(first)
+    for pmid in ranked:
+        if pmid not in selected and len(selected) < fetch_limit:
+            selected.append(pmid)
+    selected = selected[:fetch_limit]
     xml_body = retriever.get(
         f"{EUTILS_BASE}/efetch.fcgi",
         _ncbi_params({"db": "pubmed", "id": ",".join(selected), "retmode": "xml"}),
         "pubmed-efetch-selected",
     )
-    return query_plan, _parse_pubmed_xml(xml_body, memberships)
+    papers = _parse_pubmed_xml(xml_body, memberships)
+    return query_plan, _select_paper_cards(papers, max_papers), available_count
 
 
 def _serious_events(study: dict[str, Any]) -> list[dict[str, Any]]:
@@ -331,7 +475,7 @@ def _serious_events(study: dict[str, Any]) -> list[dict[str, Any]]:
                 }
             )
     summarized.sort(key=lambda item: item["num_affected"], reverse=True)
-    return summarized[:8]
+    return summarized[:5]
 
 
 def _trial_relevance(study: dict[str, Any], target: str) -> int:
@@ -345,7 +489,13 @@ def _trial_relevance(study: dict[str, Any], target: str) -> int:
         for field in ("briefTitle", "officialTitle")
     )
     intervention_text = " ".join(
-        " ".join([str(item.get("name") or ""), *map(str, item.get("otherNames", []))])
+        " ".join(
+            [
+                str(item.get("name") or ""),
+                *map(str, item.get("otherNames", [])),
+                str(item.get("description") or ""),
+            ]
+        )
         for item in interventions
     )
     summary_text = " ".join(
@@ -414,6 +564,18 @@ def _trial_program_tokens(
     }
 
 
+def _trial_phase_score(study: dict[str, Any]) -> int:
+    phases = study.get("protocolSection", {}).get("designModule", {}).get("phases", [])
+    scores = {
+        "EARLY_PHASE1": 1,
+        "PHASE1": 1,
+        "PHASE2": 2,
+        "PHASE3": 3,
+        "PHASE4": 4,
+    }
+    return max((scores.get(phase, 0) for phase in phases), default=0)
+
+
 def _parse_trials(
     data: dict[str, Any], target: str, max_trials: int
 ) -> list[dict[str, Any]]:
@@ -455,38 +617,36 @@ def _parse_trials(
                 "why_stopped": status.get("whyStopped"),
                 "phases": design.get("phases", []),
                 "enrollment": design.get("enrollmentInfo"),
-                "conditions": protocol.get("conditionsModule", {}).get(
-                    "conditions", []
-                ),
                 "interventions": [
                     {
                         "name": item.get("name"),
                         "type": item.get("type"),
                         "other_names": item.get("otherNames", []),
                     }
-                    for item in interventions[:8]
+                    for item in interventions[:6]
                 ],
-                "brief_summary": protocol.get("descriptionModule", {}).get(
-                    "briefSummary"
-                ),
                 "has_results": bool(study.get("hasResults")),
                 "primary_outcomes": [
-                    {
-                        "title": outcome.get("title"),
-                        "description": outcome.get("description"),
-                    }
+                    outcome.get("title")
                     for outcome in outcomes
                     if outcome.get("type") == "PRIMARY"
-                ][:5],
+                ][:3],
                 "serious_adverse_events": _serious_events(study),
                 "target_relevance": relevance,
+                "phase_score": _trial_phase_score(study),
                 "url": f"https://clinicaltrials.gov/study/{nct_id}" if nct_id else None,
             }
         )
     trials.sort(
         key=lambda item: (
+            -(
+                item["target_relevance"]
+                + 6 * item["has_results"]
+                + 2 * item["phase_score"]
+            ),
             -item["target_relevance"],
             not item["has_results"],
+            -item["phase_score"],
             item["status"] or "",
         )
     )
@@ -495,7 +655,7 @@ def _parse_trials(
 
 def _query_trials(
     retriever: Retriever, target: str, max_trials: int
-) -> tuple[int | None, list[dict[str, Any]]]:
+) -> tuple[int | None, int, list[dict[str, Any]]]:
     data = _json_request(
         retriever,
         CTGOV_STUDIES,
@@ -507,11 +667,14 @@ def _query_trials(
         },
         "clinicaltrials-target-search",
     )
-    return data.get("totalCount"), _parse_trials(data, target, max_trials)
+    records = _parse_trials(data, target, 100)
+    return data.get("totalCount"), len(records), records[:max_trials]
 
 
-def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]:
-    return {
+def _telemetry(
+    retriever: Retriever, started: float, *, include_requests: bool = False
+) -> dict[str, Any]:
+    telemetry = {
         "elapsed_seconds": round(time.monotonic() - started, 3),
         "request_attempts": len(retriever.requests),
         "network_requests": len(retriever.requests),
@@ -520,59 +683,214 @@ def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]:
             request["status"] == 429 for request in retriever.requests
         ),
         "bytes_received": sum(request["bytes"] for request in retriever.requests),
-        "requests": retriever.requests,
     }
+    if include_requests:
+        telemetry["requests"] = retriever.requests
+    return telemetry
+
+
+def _compact_trial(trial: dict[str, Any]) -> dict[str, Any]:
+    programs: list[str] = []
+    seen: set[str] = set()
+    for intervention in trial.get("interventions", []):
+        name = " ".join(str(intervention.get("name") or "").split())
+        if name and name.lower() not in seen:
+            programs.append(name)
+            seen.add(name.lower())
+    return {
+        "nct_id": trial.get("nct_id"),
+        "title": _clip(str(trial.get("title") or ""), 220),
+        "status": trial.get("status"),
+        "why_stopped": _clip(str(trial.get("why_stopped") or ""), 180) or None,
+        "phases": trial.get("phases", []),
+        "enrollment": trial.get("enrollment"),
+        "programs": programs[:4],
+        "has_results": trial.get("has_results", False),
+        "primary_outcomes": [
+            _clip(str(title or ""), 180)
+            for title in trial.get("primary_outcomes", [])[:2]
+        ],
+        "serious_adverse_events": trial.get("serious_adverse_events", [])[:5],
+        "url": trial.get("url"),
+    }
+
+
+def _target_evidence(
+    retriever: Retriever, target: str, max_papers: int, max_trials: int
+) -> dict[str, Any]:
+    discovery_limit = max(max_trials * 3, 15)
+    total_trials, relevant_trials, trial_records = _query_trials(
+        retriever, target, discovery_limit
+    )
+    query_plan, papers, available_papers = _query_pubmed(retriever, target, max_papers)
+    grouped_papers = {
+        group: [paper for paper in papers if paper["classification"] == group]
+        for group in ("human", "preclinical", "other")
+    }
+    visible_trials = trial_records[:max_trials]
+    return {
+        "ok": True,
+        "target": target,
+        "source_coverage": {
+            "pubmed_query_axes": list(query_plan),
+            "clinicaltrials_total_count": total_trials,
+        },
+        "papers": grouped_papers,
+        "trials": {
+            "relevant_count": relevant_trials,
+            "records": [_compact_trial(trial) for trial in visible_trials],
+        },
+        "omitted": {
+            "papers": max(0, available_papers - len(papers)),
+            "trials": max(0, relevant_trials - len(visible_trials)),
+        },
+    }
+
+
+def _encoded_size(output: dict[str, Any]) -> int:
+    return len(json.dumps(output, separators=(",", ":"), ensure_ascii=True)) + 1
+
+
+def _drop_lowest_priority_card(output: dict[str, Any]) -> bool:
+    successful = [target for target in output["targets"] if target.get("ok")]
+    priorities = (
+        ("paper", "other", 0),
+        ("trial", "records", 3),
+        ("paper", "preclinical", 1),
+        ("paper", "human", 2),
+        ("trial", "records", 1),
+        ("paper", "human", 1),
+    )
+    for kind, group, minimum in priorities:
+        for target in reversed(successful):
+            records = (
+                target["trials"][group] if kind == "trial" else target["papers"][group]
+            )
+            if len(records) <= minimum:
+                continue
+            records.pop()
+            target["omitted"]["trials" if kind == "trial" else "papers"] += 1
+            return True
+    return False
+
+
+def _shrink_excerpts(output: dict[str, Any], limit: int) -> None:
+    for target in output["targets"]:
+        if not target.get("ok"):
+            continue
+        for group in target["papers"].values():
+            for paper in group:
+                for field in ("result_excerpt", "safety_excerpt"):
+                    paper[field] = _clip(str(paper.get(field) or ""), limit)
+
+
+def _enforce_output_budget(
+    output: dict[str, Any], max_output_chars: int
+) -> dict[str, Any]:
+    output["output_budget"] = {
+        "max_characters": max_output_chars,
+        "actual_characters": 0,
+        "cards_omitted_for_budget": False,
+    }
+    while _encoded_size(output) > max_output_chars:
+        if not _drop_lowest_priority_card(output):
+            break
+        output["output_budget"]["cards_omitted_for_budget"] = True
+    if _encoded_size(output) > max_output_chars:
+        _shrink_excerpts(output, 180)
+    while _encoded_size(output) > max_output_chars:
+        if not _drop_lowest_priority_card(output):
+            break
+        output["output_budget"]["cards_omitted_for_budget"] = True
+    for _ in range(3):
+        output["output_budget"]["actual_characters"] = _encoded_size(output)
+    return output
+
+
+def _targets(values: list[str]) -> list[str]:
+    targets: list[str] = []
+    seen: set[str] = set()
+    for value in values:
+        target = " ".join(value.split())
+        normalized = target.casefold()
+        if target and normalized not in seen:
+            targets.append(target)
+            seen.add(normalized)
+    return targets
 
 
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
-    parser.add_argument("--target", required=True)
+    parser.add_argument("--target", action="append", required=True)
+    parser.add_argument("--mode", choices=("auto", "brief", "compare"), default="auto")
     parser.add_argument(
         "--questions", nargs="+", default=["biology", "programs", "safety"]
     )
     parser.add_argument("--separate-human-preclinical", action="store_true")
-    parser.add_argument("--max-papers", type=int, default=14)
-    parser.add_argument("--max-trials", type=int, default=20)
-    return parser.parse_args()
+    parser.add_argument("--max-papers", type=int)
+    parser.add_argument("--max-trials", type=int)
+    parser.add_argument("--max-output-chars", type=int, default=DEFAULT_OUTPUT_CHARS)
+    parser.add_argument("--debug-telemetry", action="store_true")
+    args = parser.parse_args()
+    args.target = _targets(args.target)
+    if not args.target:
+        parser.error("at least one non-empty --target is required")
+    if len(args.target) > MAX_TARGETS:
+        parser.error(f"at most {MAX_TARGETS} targets are supported per invocation")
+    if args.mode == "brief" and len(args.target) > 1:
+        parser.error("--mode brief accepts exactly one target")
+    if args.max_output_chars < 5_000:
+        parser.error("--max-output-chars must be at least 5000")
+    if args.max_papers is not None and args.max_papers < 1:
+        parser.error("--max-papers must be positive")
+    if args.max_papers is not None and args.max_papers > 30:
+        parser.error("--max-papers cannot exceed 30")
+    if args.max_trials is not None and args.max_trials < 1:
+        parser.error("--max-trials must be positive")
+    if args.max_trials is not None and args.max_trials > 50:
+        parser.error("--max-trials cannot exceed 50")
+    return args
 
 
 def main() -> int:
     args = parse_args()
     started = time.monotonic()
     retriever = Retriever()
-    try:
-        query_plan, papers = _query_pubmed(retriever, args.target, args.max_papers)
-        total_trials, trials = _query_trials(retriever, args.target, args.max_trials)
-        grouped_papers = {
-            group: [paper for paper in papers if paper["classification"] == group]
-            for group in ("human", "preclinical", "other")
-        }
-        output = {
-            "ok": True,
-            "target": args.target,
-            "questions": args.questions,
-            "separate_human_preclinical": args.separate_human_preclinical,
-            "query_plan": query_plan,
-            "papers": grouped_papers,
-            "trials": {
-                "total_count": total_trials,
-                "records": trials,
-            },
-            "limitations": [
-                "PubMed retrieval is relevance-ranked and bounded; it is not a systematic review.",
-                "Paper classification is heuristic and should be checked during synthesis.",
-                "ClinicalTrials.gov event counts are not automatically treatment-attributed.",
-                "Current program status is limited to the retrieved registry snapshot.",
-            ],
-            "telemetry": _telemetry(retriever, started),
-        }
-    except Exception as exc:  # noqa: BLE001
-        output = {
-            "ok": False,
-            "target": args.target,
-            "error": {"type": type(exc).__name__, "message": str(exc)},
-            "telemetry": _telemetry(retriever, started),
-        }
+    mode = "compare" if args.mode == "compare" or len(args.target) > 1 else "brief"
+    max_papers = args.max_papers or (8 if mode == "compare" else 10)
+    max_trials = args.max_trials or (5 if mode == "compare" else 8)
+    targets: list[dict[str, Any]] = []
+    for target in args.target:
+        try:
+            targets.append(_target_evidence(retriever, target, max_papers, max_trials))
+        except Exception as exc:  # noqa: BLE001
+            targets.append(
+                {
+                    "ok": False,
+                    "target": target,
+                    "error": {"type": type(exc).__name__, "message": str(exc)},
+                }
+            )
+    succeeded = sum(target["ok"] for target in targets)
+    output = {
+        "schema_version": 1,
+        "ok": succeeded > 0,
+        "partial": 0 < succeeded < len(targets),
+        "mode": mode,
+        "questions": args.questions,
+        "separate_human_preclinical": args.separate_human_preclinical,
+        "targets": targets,
+        "limitations": [
+            "PubMed retrieval is relevance-ranked and bounded; it is not a systematic review.",
+            "Paper classification is heuristic and should be checked during synthesis.",
+            "ClinicalTrials.gov event counts are not automatically treatment-attributed.",
+            "Current program status is limited to the retrieved registry snapshot.",
+        ],
+        "telemetry": _telemetry(
+            retriever, started, include_requests=args.debug_telemetry
+        ),
+    }
+    output = _enforce_output_budget(output, args.max_output_chars)
     json.dump(output, sys.stdout, separators=(",", ":"), ensure_ascii=True)
     sys.stdout.write("\n")
     return 0 if output["ok"] else 1
diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
index a8f5d8d96..00fa6d446 100644
--- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
+++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py
@@ -1,10 +1,15 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 
+import argparse
+import io
+import json
 import importlib.util
 import time
 import unittest
+from contextlib import redirect_stdout
 from pathlib import Path
+from unittest import mock
 
 SCRIPT_PATH = Path(__file__).with_name("research_target_evidence.py")
 SPEC = importlib.util.spec_from_file_location("research_target_evidence", SCRIPT_PATH)
@@ -20,9 +25,11 @@ def trial(
     official_title: str = "",
     summary: str = "",
     intervention: str = "",
+    phase: str = "PHASE1",
+    has_results: bool = False,
 ) -> dict:
     return {
-        "hasResults": False,
+        "hasResults": has_results,
         "protocolSection": {
             "identificationModule": {
                 "nctId": nct_id,
@@ -32,7 +39,7 @@ def trial(
             "statusModule": {"overallStatus": "COMPLETED"},
             "designModule": {
                 "studyType": "INTERVENTIONAL",
-                "phases": ["PHASE1"],
+                "phases": [phase],
                 "enrollmentInfo": {"count": 10, "type": "ACTUAL"},
             },
             "descriptionModule": {"briefSummary": summary},
@@ -49,6 +56,15 @@ def trial(
 
 
 class PaperClassificationTests(unittest.TestCase):
+    def test_safety_excerpt_requires_a_safety_term(self) -> None:
+        excerpt = research_target_evidence._best_excerpt(
+            "The response rate was 42%. Median survival was 10 months.",
+            ("adverse", "toxicity"),
+            require_term=True,
+        )
+
+        self.assertEqual(excerpt, "")
+
     def test_patient_derived_xenograft_is_preclinical(self) -> None:
         classification = research_target_evidence._classify_paper(
             "Target activity in patient-derived xenografts",
@@ -111,9 +127,43 @@ def test_summary_only_record_requires_a_known_program_alias(self) -> None:
             ["NCT00000001", "NCT00000002"],
         )
 
+    def test_result_bearing_mature_trial_ranks_first(self) -> None:
+        records = research_target_evidence._parse_trials(
+            {
+                "studies": [
+                    trial(
+                        "NCT00000001",
+                        "TROP2 CAR-T study",
+                        intervention="TROP2 CAR-T",
+                    ),
+                    trial(
+                        "NCT00000002",
+                        "TROP2 phase 3 study",
+                        intervention="TROP2 sacituzumab govitecan",
+                        phase="PHASE3",
+                        has_results=True,
+                    ),
+                ]
+            },
+            "TROP2",
+            10,
+        )
+
+        self.assertEqual(records[0]["nct_id"], "NCT00000002")
+
 
 class TelemetryTests(unittest.TestCase):
-    def test_telemetry_reports_request_metrics(self) -> None:
+    def test_api_key_is_redacted_from_errors(self) -> None:
+        error = RuntimeError(
+            "https://example.test/path?x=1&api_key=secret&retmode=json"
+        )
+
+        self.assertEqual(
+            research_target_evidence._safe_error(error),
+            "https://example.test/path?x=1&api_key=<redacted>&retmode=json",
+        )
+
+    def test_telemetry_is_compact_by_default(self) -> None:
         retriever = research_target_evidence.Retriever()
         retriever.requests = [
             {
@@ -130,18 +180,175 @@ def test_telemetry_reports_request_metrics(self) -> None:
 
         self.assertEqual(telemetry["network_requests"], 1)
         self.assertEqual(telemetry["bytes_received"], 100)
+        self.assertNotIn("requests", telemetry)
+
+        debug = research_target_evidence._telemetry(
+            retriever, time.monotonic(), include_requests=True
+        )
+        self.assertEqual(debug["requests"], retriever.requests)
+
+
+class MultiTargetTests(unittest.TestCase):
+    def test_round_robin_ids_balances_query_groups(self) -> None:
+        self.assertEqual(
+            research_target_evidence._round_robin_ids(
+                {"biology": ["1", "2"], "safety": ["3", "2"]}, 3
+            ),
+            ["1", "3", "2"],
+        )
+
+    def test_targets_are_deduplicated_without_reordering(self) -> None:
         self.assertEqual(
-            set(telemetry),
+            research_target_evidence._targets([" GPC3 ", "CLDN18.2", "gpc3", "TROP2"]),
+            ["GPC3", "CLDN18.2", "TROP2"],
+        )
+
+    def test_pubmed_group_ranking_handles_ids_outside_candidate_bound(self) -> None:
+        def request(_retriever, _base_url, _params, label):
+            if label.startswith("pubmed-esearch"):
+                suffix = label.rsplit("-", 1)[-1]
+                ids = {
+                    "biology": ["1", "5"],
+                    "human": ["2", "6"],
+                    "modalities": ["3", "7"],
+                    "safety": ["4", "8"],
+                    "landmark": [str(value) for value in range(9, 111)],
+                }[suffix]
+                return {"esearchresult": {"idlist": ids}}
+            return {
+                "result": {
+                    "1": {"title": "GPC3 biology"},
+                    "2": {"title": "GPC3 phase 1"},
+                    "3": {"title": "GPC3 antibody"},
+                    "4": {"title": "GPC3 safety"},
+                    "5": {"title": "GPC3 expression"},
+                }
+            }
+
+        retriever = mock.Mock()
+        retriever.get.return_value = b"<PubmedArticleSet />"
+        with mock.patch.object(
+            research_target_evidence, "_json_request", side_effect=request
+        ):
+            _, papers, available = research_target_evidence._query_pubmed(
+                retriever, "GPC3", 1
+            )
+
+        self.assertEqual(papers, [])
+        self.assertEqual(available, 110)
+
+    def test_paper_selection_preserves_program_and_evidence_classes(self) -> None:
+        papers = [
+            {
+                "pmid": "1",
+                "classification": "human",
+                "matched_queries": ["human"],
+            },
+            {
+                "pmid": "2",
+                "classification": "other",
+                "matched_queries": ["landmark"],
+            },
+            {
+                "pmid": "3",
+                "classification": "preclinical",
+                "matched_queries": ["biology"],
+            },
+            {
+                "pmid": "4",
+                "classification": "human",
+                "matched_queries": ["safety"],
+            },
             {
-                "elapsed_seconds",
-                "request_attempts",
-                "network_requests",
-                "retries",
-                "rate_limit_events",
-                "bytes_received",
-                "requests",
+                "pmid": "5",
+                "classification": "human",
+                "matched_queries": ["landmark"],
             },
+        ]
+
+        selected = research_target_evidence._select_paper_cards(papers, 5)
+
+        self.assertEqual(
+            {paper["pmid"] for paper in selected}, {"1", "2", "3", "4", "5"}
+        )
+        self.assertEqual([paper["pmid"] for paper in selected[:2]], ["2", "5"])
+
+    def test_output_budget_drops_cards_without_truncating_json(self) -> None:
+        paper = {
+            "pmid": "1",
+            "title": "A" * 200,
+            "classification": "human",
+            "matched_queries": ["landmark"],
+            "result_excerpt": "R" * 500,
+            "safety_excerpt": "S" * 500,
+            "url": "https://pubmed.ncbi.nlm.nih.gov/1/",
+        }
+        target = {
+            "ok": True,
+            "target": "GPC3",
+            "source_coverage": {},
+            "papers": {
+                "human": [dict(paper, pmid=str(index)) for index in range(8)],
+                "preclinical": [dict(paper, pmid=str(index)) for index in range(8, 12)],
+                "other": [dict(paper, pmid=str(index)) for index in range(12, 16)],
+            },
+            "trials": {"relevant_count": 0, "records": []},
+            "omitted": {"papers": 0, "trials": 0},
+        }
+        output = {"ok": True, "targets": [target], "telemetry": {}}
+
+        bounded = research_target_evidence._enforce_output_budget(output, 5_000)
+        encoded = json.dumps(bounded, separators=(",", ":"), ensure_ascii=True)
+
+        self.assertLessEqual(len(encoded) + 1, 5_000)
+        self.assertTrue(bounded["output_budget"]["cards_omitted_for_budget"])
+        self.assertGreater(target["omitted"]["papers"], 0)
+
+    def test_main_uses_one_retriever_and_preserves_partial_success(self) -> None:
+        args = argparse.Namespace(
+            target=["GPC3", "BROKEN"],
+            mode="auto",
+            questions=["biology", "programs", "safety"],
+            separate_human_preclinical=True,
+            max_papers=None,
+            max_trials=None,
+            max_output_chars=10_000,
+            debug_telemetry=False,
         )
+        retrievers: list[object] = []
+
+        def target_evidence(retriever, target, max_papers, max_trials):
+            retrievers.append(retriever)
+            if target == "BROKEN":
+                raise RuntimeError("expected failure")
+            return {
+                "ok": True,
+                "target": target,
+                "source_coverage": {},
+                "papers": {"human": [], "preclinical": [], "other": []},
+                "trials": {"relevant_count": 0, "records": []},
+                "omitted": {"papers": 0, "trials": 0},
+            }
+
+        stdout = io.StringIO()
+        with (
+            mock.patch.object(
+                research_target_evidence, "parse_args", return_value=args
+            ),
+            mock.patch.object(
+                research_target_evidence,
+                "_target_evidence",
+                side_effect=target_evidence,
+            ),
+            redirect_stdout(stdout),
+        ):
+            exit_code = research_target_evidence.main()
+
+        payload = json.loads(stdout.getvalue())
+        self.assertEqual(exit_code, 0)
+        self.assertTrue(payload["partial"])
+        self.assertEqual([target["ok"] for target in payload["targets"]], [True, False])
+        self.assertIs(retrievers[0], retrievers[1])
 
 
 if __name__ == "__main__":