From 5de8c321501b03d5a0d7f6de6b30898045c3c740 Mon Sep 17 00:00:00 2001 From: Pearson Date: Tue, 23 Jun 2026 07:33:52 -0400 Subject: [PATCH 1/3] Add bounded target evidence skill --- .../.codex-plugin/plugin.json | 2 +- plugins/life-science-research/README.md | 4 +- .../skills/research-router-skill/SKILL.md | 4 +- .../research-target-evidence-skill/SKILL.md | 44 ++ .../agents/openai.yaml | 4 + .../scripts/research_target_evidence.py | 642 ++++++++++++++++++ .../scripts/test_research_target_evidence.py | 132 ++++ 7 files changed, 829 insertions(+), 3 deletions(-) create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py create mode 100644 plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py diff --git a/plugins/life-science-research/.codex-plugin/plugin.json b/plugins/life-science-research/.codex-plugin/plugin.json index 40348aa1f..8fae0d7a0 100644 --- a/plugins/life-science-research/.codex-plugin/plugin.json +++ b/plugins/life-science-research/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "life-science-research", - "version": "1.0.3", + "version": "1.1.0", "description": "General life-sciences research workflows with query routing, evidence synthesis, and optional parallel subagent analysis across genetics, omics, biology, chemistry, structure, clinical evidence, and public dataset discovery.", "author": { "name": "OpenAI" diff --git a/plugins/life-science-research/README.md b/plugins/life-science-research/README.md index 9833b9817..a0495635f 100644 --- a/plugins/life-science-research/README.md +++ b/plugins/life-science-research/README.md @@ -45,7 +45,7 @@ This plugin is meant to support workflows like: ## Skill Families -The plugin currently bundles 50 skills. The most useful way to think about them is by research area rather than as a flat list. +The plugin currently bundles 51 skills. The most useful way to think about them is by research area rather than as a flat list. ### Human Genetics And Variant Evidence @@ -94,6 +94,7 @@ The plugin currently bundles 50 skills. The most useful way to think about them ### Clinical, Translational, And Disease Evidence +- `research-target-evidence-skill` - `clinicaltrials-skill` - `cbioportal-skill` - `civic-skill` @@ -165,6 +166,7 @@ Each subagent should receive a bounded objective and return concise findings, ca - `Map the most plausible causal genes at this inflammatory bowel disease locus and explain why.` - `Summarize known structure, ligand, and pathway information for EGFR.` - `Pull ClinicalTrials.gov, ChEMBL, and PharmGKB context for JAK inhibitors in alopecia areata.` +- `Use $research-target-evidence-skill to separate human and preclinical evidence for ROR1 biology, therapeutic programs, and safety.` - `Find metabolomics and proteomics resources relevant to MASLD and PPARG.` - `Interpret this variant using ClinVar, gnomAD, Ensembl, and cohort association evidence.` diff --git a/plugins/life-science-research/skills/research-router-skill/SKILL.md b/plugins/life-science-research/skills/research-router-skill/SKILL.md index 9aff213ef..217df06c3 100644 --- a/plugins/life-science-research/skills/research-router-skill/SKILL.md +++ b/plugins/life-science-research/skills/research-router-skill/SKILL.md @@ -68,6 +68,8 @@ Choose the smallest set of skills that can answer the question well. Examples: +- bounded target biology, program, and safety review: + `research-target-evidence-skill` - target or disease evidence review: `opentargets-skill`, `gwas-catalog-skill`, `gtex-eqtl-skill`, `human-protein-atlas-skill` - variant interpretation: @@ -79,7 +81,7 @@ Examples: - chemistry and pharmacology: `chembl-skill`, `bindingdb-skill`, `pubchem-pug-skill`, `pharmgkb-skill` - clinical and translational: - `clinicaltrials-skill`, `cbioportal-skill`, `civic-skill` + `research-target-evidence-skill`, `clinicaltrials-skill`, `cbioportal-skill`, `civic-skill` - literature and dataset discovery: `ncbi-entrez-skill`, `ncbi-pmc-skill`, `biorxiv-skill`, `biostudies-arrayexpress-skill`, `ncbi-datasets-skill` diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md new file mode 100644 index 000000000..bcea2b860 --- /dev/null +++ b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md @@ -0,0 +1,44 @@ +--- +name: research-target-evidence-skill +description: Produce a bounded, source-backed evidence brief for a biological target, covering biology, therapeutic programs, human safety, and preclinical evidence. Use when a user asks for a target assessment, translational evidence, program history, modality comparison, or human-versus-preclinical safety review and wants a fast primary-source research pass. +--- + +## Research Target Evidence + +Use the bundled script exactly once for the requested target. Let it plan, +deduplicate, pace, cache, and batch requests across PubMed and +ClinicalTrials.gov. + +Do not decompose the request into additional source calls unless the script +returns `ok=false` or the user explicitly asks for a deeper follow-up. + +## Execution + +Extract the target and requested evidence axes, then run: + +```bash +python scripts/research_target_evidence.py \ + --target "" \ + --questions biology programs safety \ + --separate-human-preclinical +``` + +The script uses a six-hour response cache by default. Use `--cache-mode off` +when the user explicitly needs a fresh retrieval. + +## Synthesis + +- Lead with the target-level conclusion and the largest uncertainty. +- Separate human evidence from preclinical evidence. +- Cover only modalities supported by the returned papers or trial records. +- Link each PMID as `[PMID ](https://pubmed.ncbi.nlm.nih.gov//)`. +- Link each trial as `[](https://clinicaltrials.gov/study/)`. +- Distinguish observed human toxicity from preclinical or theoretical risk. +- Treat registry adverse-event counts as non-attributed unless the record says + otherwise. +- Preserve the returned limitations and current registry statuses. +- Keep the answer concise enough that the evidence hierarchy remains visible. + +The retrieval is bounded and relevance-ranked, not a systematic review. Check +the script's heuristic human/preclinical classification during synthesis. Do +not include retrieval telemetry in the user-facing brief unless requested. diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml new file mode 100644 index 000000000..06c26d113 --- /dev/null +++ b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Research Target Evidence" + short_description: "Bounded target biology and safety evidence" + default_prompt: "Use $research-target-evidence-skill to summarize target biology, therapeutic programs, and safety evidence." diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py new file mode 100644 index 000000000..531af9606 --- /dev/null +++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py @@ -0,0 +1,642 @@ +#!/usr/bin/env python3 +"""Bounded target-evidence retrieval using PubMed and ClinicalTrials.gov.""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import re +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +import xml.etree.ElementTree as ET +from collections import defaultdict +from pathlib import Path +from typing import Any + +EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" +CTGOV_STUDIES = "https://clinicaltrials.gov/api/v2/studies" +USER_AGENT = "research-target-evidence/0.1" + + +class Retriever: + def __init__(self, cache_dir: Path, cache_mode: str, ttl_seconds: int) -> None: + self.cache_dir = cache_dir + self.cache_mode = cache_mode + self.ttl_seconds = ttl_seconds + self.requests: list[dict[str, Any]] = [] + self.last_request_by_host: dict[str, float] = {} + if cache_mode != "off": + cache_dir.mkdir(parents=True, exist_ok=True) + + def _cache_paths(self, url: str) -> tuple[Path, Path]: + digest = hashlib.sha256(url.encode("utf-8")).hexdigest() + return self.cache_dir / f"{digest}.body", self.cache_dir / f"{digest}.json" + + def _read_cache(self, url: str) -> bytes | None: + if self.cache_mode not in {"read-only", "read-write"}: + return None + body_path, meta_path = self._cache_paths(url) + if not body_path.exists() or not meta_path.exists(): + return None + try: + meta = json.loads(meta_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + if time.time() - float(meta.get("saved_at", 0)) > self.ttl_seconds: + return None + return body_path.read_bytes() + + def _write_cache(self, url: str, body: bytes) -> None: + if self.cache_mode != "read-write": + return + body_path, meta_path = self._cache_paths(url) + body_path.write_bytes(body) + meta_path.write_text( + json.dumps({"saved_at": time.time()}, sort_keys=True), + encoding="utf-8", + ) + + def _pace(self, host: str) -> None: + minimum_interval = 0.38 if host.endswith("ncbi.nlm.nih.gov") else 0.05 + elapsed = time.monotonic() - self.last_request_by_host.get(host, 0.0) + if elapsed < minimum_interval: + time.sleep(minimum_interval - elapsed) + + def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes: + query = urllib.parse.urlencode(params, doseq=True) + url = f"{base_url}?{query}" if query else base_url + cached = self._read_cache(url) + if cached is not None: + self.requests.append( + { + "label": label, + "host": urllib.parse.urlparse(url).netloc, + "elapsed_ms": 0, + "bytes": len(cached), + "status": 200, + "cache_hit": True, + "attempt": 0, + } + ) + return cached + + host = urllib.parse.urlparse(url).netloc + last_error: Exception | None = None + for attempt in range(1, 4): + self._pace(host) + started = time.monotonic() + status = 0 + body = b"" + try: + request = urllib.request.Request( + url, headers={"User-Agent": USER_AGENT} + ) + with urllib.request.urlopen(request, timeout=20) as response: + status = int(response.status) + body = response.read() + elapsed_ms = round((time.monotonic() - started) * 1000) + self.last_request_by_host[host] = time.monotonic() + self.requests.append( + { + "label": label, + "host": host, + "elapsed_ms": elapsed_ms, + "bytes": len(body), + "status": status, + "cache_hit": False, + "attempt": attempt, + } + ) + self._write_cache(url, body) + return body + except urllib.error.HTTPError as exc: + status = exc.code + last_error = exc + except (urllib.error.URLError, TimeoutError) as exc: + last_error = exc + elapsed_ms = round((time.monotonic() - started) * 1000) + self.last_request_by_host[host] = time.monotonic() + self.requests.append( + { + "label": label, + "host": host, + "elapsed_ms": elapsed_ms, + "bytes": len(body), + "status": status, + "cache_hit": False, + "attempt": attempt, + "error": str(last_error), + } + ) + if status not in {429, 500, 502, 503, 504} and status != 0: + break + time.sleep(0.8 * (2 ** (attempt - 1))) + raise RuntimeError(f"Request failed for {label}: {last_error}") + + +def _ncbi_params(params: dict[str, Any]) -> dict[str, Any]: + merged = dict(params) + api_key = os.environ.get("NCBI_API_KEY") or os.environ.get("NCBI_EUTILS_API_KEY") + if api_key: + merged["api_key"] = api_key + if os.environ.get("NCBI_TOOL"): + merged["tool"] = os.environ["NCBI_TOOL"] + if os.environ.get("NCBI_EMAIL"): + merged["email"] = os.environ["NCBI_EMAIL"] + return merged + + +def _json_request( + retriever: Retriever, base_url: str, params: dict[str, Any], label: str +) -> dict[str, Any]: + return json.loads(retriever.get(base_url, params, label).decode("utf-8")) + + +def _text(element: ET.Element | None) -> str: + if element is None: + return "" + return "".join(element.itertext()).strip() + + +def _year(article: ET.Element) -> str | None: + for path in ( + "Journal/JournalIssue/PubDate/Year", + "ArticleDate/Year", + "Journal/JournalIssue/PubDate/MedlineDate", + ): + value = article.findtext(path) + if value: + match = re.search(r"(?:19|20)\d{2}", value) + return match.group(0) if match else value[:20] + return None + + +def _classify_paper(title: str, abstract: str, publication_types: list[str]) -> str: + text = f"{title} {abstract} {' '.join(publication_types)}".lower() + human_text = text.replace("patient-derived", "") + clinical_publication = any( + "clinical trial" in publication_type.lower() + for publication_type in publication_types + ) + human_terms = ( + "phase 1", + "phase i", + "phase 2", + "phase ii", + "patients were", + "patients with", + "patients received", + "patients treated", + "participants", + "first-in-human", + "human cancer", + "human tissue", + ) + preclinical_terms = ( + "mouse", + "mice", + "murine", + "xenograft", + "cell line", + "in vitro", + "in vivo model", + "preclinical", + ) + human_score = (3 if clinical_publication else 0) + sum( + term in human_text for term in human_terms + ) + contextual_publication = any( + publication_type.lower() in {"review", "editorial", "comment"} + for publication_type in publication_types + ) + if contextual_publication and not clinical_publication: + human_score = 0 + preclinical_score = sum(term in text for term in preclinical_terms) + if preclinical_score > human_score: + return "preclinical" + if human_score: + return "human" + if preclinical_score: + return "preclinical" + return "other" + + +def _parse_pubmed_xml( + body: bytes, memberships: dict[str, list[str]] +) -> list[dict[str, Any]]: + root = ET.fromstring(body) + papers: list[dict[str, Any]] = [] + for node in root.findall("PubmedArticle"): + citation = node.find("MedlineCitation") + article = citation.find("Article") if citation is not None else None + if citation is None or article is None: + continue + pmid = citation.findtext("PMID") or "" + title = _text(article.find("ArticleTitle")) + abstract = " ".join( + _text(item) + for item in article.findall("Abstract/AbstractText") + if _text(item) + ) + publication_types = [ + _text(item) + for item in article.findall("PublicationTypeList/PublicationType") + ] + ids = { + item.attrib.get("IdType", ""): (item.text or "") + for item in node.findall("PubmedData/ArticleIdList/ArticleId") + } + papers.append( + { + "pmid": pmid, + "title": title, + "year": _year(article), + "journal": article.findtext("Journal/Title"), + "doi": ids.get("doi"), + "publication_types": publication_types, + "classification": _classify_paper(title, abstract, publication_types), + "matched_queries": memberships.get(pmid, []), + "abstract_excerpt": abstract[:1200], + "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/", + } + ) + return papers + + +def _round_robin_ids(groups: dict[str, list[str]], limit: int) -> list[str]: + selected: list[str] = [] + seen: set[str] = set() + positions = defaultdict(int) + while len(selected) < limit: + added = False + for name, values in groups.items(): + position = positions[name] + while position < len(values) and values[position] in seen: + position += 1 + positions[name] = position + 1 + if position < len(values): + selected.append(values[position]) + seen.add(values[position]) + added = True + if len(selected) >= limit: + break + if not added: + break + return selected + + +def _query_pubmed( + retriever: Retriever, target: str, max_papers: int +) -> tuple[dict[str, str], list[dict[str, Any]]]: + query_plan = { + "biology": f"{target}[Title/Abstract] AND (biology[Title/Abstract] OR signaling[Title/Abstract] OR expression[Title/Abstract] OR function[Title/Abstract])", + "human": f'{target}[Title/Abstract] AND (clinical trial[Publication Type] OR "phase 1"[Title/Abstract] OR patient[Title/Abstract] OR safety[Title/Abstract])', + "modalities": f'{target}[Title/Abstract] AND (antibody[Title/Abstract] OR ADC[Title/Abstract] OR "CAR T"[Title/Abstract] OR "chimeric antigen receptor"[Title/Abstract])', + "safety": f'{target}[Title/Abstract] AND ("normal tissue"[Title/Abstract] OR toxicity[Title/Abstract] OR safety[Title/Abstract] OR adverse[Title/Abstract])', + } + groups: dict[str, list[str]] = {} + memberships: dict[str, list[str]] = defaultdict(list) + for name, term in query_plan.items(): + data = _json_request( + retriever, + f"{EUTILS_BASE}/esearch.fcgi", + _ncbi_params( + { + "db": "pubmed", + "term": term, + "retmode": "json", + "retmax": 12, + "sort": "relevance", + } + ), + f"pubmed-esearch-{name}", + ) + ids = data.get("esearchresult", {}).get("idlist", []) + groups[name] = ids + for pmid in ids: + memberships[pmid].append(name) + + candidate_ids = _round_robin_ids(groups, max(max_papers * 3, max_papers)) + if not candidate_ids: + return query_plan, [] + + summary = _json_request( + retriever, + f"{EUTILS_BASE}/esummary.fcgi", + _ncbi_params( + {"db": "pubmed", "id": ",".join(candidate_ids), "retmode": "json"} + ), + "pubmed-esummary-selected", + ) + result = summary.get("result", {}) + target_token = target.lower() + + def rank(pmid: str) -> tuple[int, int]: + title = str(result.get(pmid, {}).get("title") or "").lower() + title_score = 6 if target_token in title else 0 + evidence_score = sum( + term in title + for term in ( + "clinical", + "phase", + "patient", + "antibody", + "car-t", + "chimeric antigen receptor", + "antibody-drug conjugate", + "safety", + "normal tissue", + "expression", + ) + ) + return title_score + evidence_score + len( + memberships[pmid] + ), -candidate_ids.index(pmid) + + selected = sorted(candidate_ids, key=rank, reverse=True)[:max_papers] + xml_body = retriever.get( + f"{EUTILS_BASE}/efetch.fcgi", + _ncbi_params({"db": "pubmed", "id": ",".join(selected), "retmode": "xml"}), + "pubmed-efetch-selected", + ) + return query_plan, _parse_pubmed_xml(xml_body, memberships) + + +def _serious_events(study: dict[str, Any]) -> list[dict[str, Any]]: + module = study.get("resultsSection", {}).get("adverseEventsModule", {}) + events = module.get("seriousEvents", []) + summarized: list[dict[str, Any]] = [] + for event in events: + affected = sum( + int(stat.get("numAffected") or 0) for stat in event.get("stats", []) + ) + if affected: + summarized.append( + { + "term": event.get("term"), + "organ_system": event.get("organSystem"), + "num_affected": affected, + } + ) + summarized.sort(key=lambda item: item["num_affected"], reverse=True) + return summarized[:8] + + +def _trial_relevance(study: dict[str, Any], target: str) -> int: + protocol = study.get("protocolSection", {}) + identification = protocol.get("identificationModule", {}) + descriptions = protocol.get("descriptionModule", {}) + interventions = protocol.get("armsInterventionsModule", {}).get("interventions", []) + target_pattern = re.compile(rf"\b{re.escape(target)}\b", re.IGNORECASE) + title_text = " ".join( + str(identification.get(field) or "") + for field in ("briefTitle", "officialTitle") + ) + intervention_text = " ".join( + " ".join([str(item.get("name") or ""), *map(str, item.get("otherNames", []))]) + for item in interventions + ) + summary_text = " ".join( + str(descriptions.get(field) or "") + for field in ("briefSummary", "detailedDescription") + ) + condition_text = " ".join( + map(str, protocol.get("conditionsModule", {}).get("conditions", [])) + ) + title_match = bool(target_pattern.search(title_text)) + intervention_match = bool(target_pattern.search(intervention_text)) + summary_match = bool(target_pattern.search(summary_text)) + condition_match = bool(target_pattern.search(condition_text)) + if ( + summary_match + and not title_match + and not intervention_match + and not condition_match + and protocol.get("designModule", {}).get("studyType") != "INTERVENTIONAL" + ): + return 0 + return ( + 5 * title_match + 5 * intervention_match + 2 * summary_match + condition_match + ) + + +def _trial_program_tokens( + study: dict[str, Any], *, include_interventions: bool = True +) -> set[str]: + protocol = study.get("protocolSection", {}) + identification = protocol.get("identificationModule", {}) + interventions = protocol.get("armsInterventionsModule", {}).get("interventions", []) + values = [ + str(identification.get("briefTitle") or ""), + str(identification.get("officialTitle") or ""), + ] + if include_interventions: + values.extend(str(item.get("name") or "") for item in interventions) + values.extend( + str(alias) for item in interventions for alias in item.get("otherNames", []) + ) + text = " ".join(values).lower() + generic = { + "advanced", + "antibody", + "cancer", + "cells", + "clinical", + "combination", + "escalation", + "expansion", + "malignancies", + "patients", + "phase", + "solid", + "study", + "therapy", + "treatment", + "tumor", + "tumors", + } + return { + token + for token in re.findall(r"[a-z0-9][a-z0-9-]{4,}", text) + if token not in generic + } + + +def _parse_trials( + data: dict[str, Any], target: str, max_trials: int +) -> list[dict[str, Any]]: + trials: list[dict[str, Any]] = [] + ranked_studies = [ + (study, _trial_relevance(study, target)) for study in data.get("studies", []) + ] + direct_program_tokens = set().union( + *( + _trial_program_tokens(study, include_interventions=False) + for study, relevance in ranked_studies + if relevance >= 5 + ) + ) + for study, relevance in ranked_studies: + if relevance == 0 or ( + relevance == 2 + and not (_trial_program_tokens(study) & direct_program_tokens) + ): + continue + protocol = study.get("protocolSection", {}) + identification = protocol.get("identificationModule", {}) + status = protocol.get("statusModule", {}) + design = protocol.get("designModule", {}) + interventions = protocol.get("armsInterventionsModule", {}).get( + "interventions", [] + ) + nct_id = identification.get("nctId") + outcomes = ( + study.get("resultsSection", {}) + .get("outcomeMeasuresModule", {}) + .get("outcomeMeasures", []) + ) + trials.append( + { + "nct_id": nct_id, + "title": identification.get("briefTitle"), + "status": status.get("overallStatus"), + "why_stopped": status.get("whyStopped"), + "phases": design.get("phases", []), + "enrollment": design.get("enrollmentInfo"), + "conditions": protocol.get("conditionsModule", {}).get( + "conditions", [] + ), + "interventions": [ + { + "name": item.get("name"), + "type": item.get("type"), + "other_names": item.get("otherNames", []), + } + for item in interventions[:8] + ], + "brief_summary": protocol.get("descriptionModule", {}).get( + "briefSummary" + ), + "has_results": bool(study.get("hasResults")), + "primary_outcomes": [ + { + "title": outcome.get("title"), + "description": outcome.get("description"), + } + for outcome in outcomes + if outcome.get("type") == "PRIMARY" + ][:5], + "serious_adverse_events": _serious_events(study), + "target_relevance": relevance, + "url": f"https://clinicaltrials.gov/study/{nct_id}" if nct_id else None, + } + ) + trials.sort( + key=lambda item: ( + -item["target_relevance"], + not item["has_results"], + item["status"] or "", + ) + ) + return trials[:max_trials] + + +def _query_trials( + retriever: Retriever, target: str, max_trials: int +) -> tuple[int | None, list[dict[str, Any]]]: + data = _json_request( + retriever, + CTGOV_STUDIES, + { + "query.term": target, + "pageSize": 100, + "countTotal": "true", + "format": "json", + }, + "clinicaltrials-target-search", + ) + return data.get("totalCount"), _parse_trials(data, target, max_trials) + + +def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]: + network = [request for request in retriever.requests if not request["cache_hit"]] + return { + "elapsed_seconds": round(time.monotonic() - started, 3), + "request_attempts": len(retriever.requests), + "network_requests": len(network), + "cache_hits": sum(request["cache_hit"] for request in retriever.requests), + "retries": sum(request["attempt"] > 1 for request in retriever.requests), + "rate_limit_events": sum( + request["status"] == 429 for request in retriever.requests + ), + "bytes_received": sum(request["bytes"] for request in network), + "requests": retriever.requests, + } + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--target", required=True) + parser.add_argument( + "--questions", nargs="+", default=["biology", "programs", "safety"] + ) + parser.add_argument("--separate-human-preclinical", action="store_true") + parser.add_argument("--max-papers", type=int, default=14) + parser.add_argument("--max-trials", type=int, default=20) + parser.add_argument( + "--cache-mode", choices=["off", "read-only", "read-write"], default="read-write" + ) + parser.add_argument("--cache-ttl-seconds", type=int, default=21600) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + started = time.monotonic() + cache_dir = Path.home() / ".cache" / "codex" / "research-target-evidence" + retriever = Retriever(cache_dir, args.cache_mode, args.cache_ttl_seconds) + try: + query_plan, papers = _query_pubmed(retriever, args.target, args.max_papers) + total_trials, trials = _query_trials(retriever, args.target, args.max_trials) + grouped_papers = { + group: [paper for paper in papers if paper["classification"] == group] + for group in ("human", "preclinical", "other") + } + output = { + "ok": True, + "target": args.target, + "questions": args.questions, + "separate_human_preclinical": args.separate_human_preclinical, + "query_plan": query_plan, + "papers": grouped_papers, + "trials": { + "total_count": total_trials, + "records": trials, + }, + "limitations": [ + "PubMed retrieval is relevance-ranked and bounded; it is not a systematic review.", + "Paper classification is heuristic and should be checked during synthesis.", + "ClinicalTrials.gov event counts are not automatically treatment-attributed.", + "Current program status is limited to the retrieved registry snapshot.", + ], + "telemetry": _telemetry(retriever, started), + } + except Exception as exc: # noqa: BLE001 + output = { + "ok": False, + "target": args.target, + "error": {"type": type(exc).__name__, "message": str(exc)}, + "telemetry": _telemetry(retriever, started), + } + json.dump(output, sys.stdout, separators=(",", ":"), ensure_ascii=True) + sys.stdout.write("\n") + return 0 if output["ok"] else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py new file mode 100644 index 000000000..43e335896 --- /dev/null +++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path + +SCRIPT_PATH = Path(__file__).with_name("research_target_evidence.py") +SPEC = importlib.util.spec_from_file_location("research_target_evidence", SCRIPT_PATH) +assert SPEC and SPEC.loader +research_target_evidence = importlib.util.module_from_spec(SPEC) +SPEC.loader.exec_module(research_target_evidence) + + +def trial( + nct_id: str, + title: str, + *, + official_title: str = "", + summary: str = "", + intervention: str = "", +) -> dict: + return { + "hasResults": False, + "protocolSection": { + "identificationModule": { + "nctId": nct_id, + "briefTitle": title, + "officialTitle": official_title, + }, + "statusModule": {"overallStatus": "COMPLETED"}, + "designModule": { + "studyType": "INTERVENTIONAL", + "phases": ["PHASE1"], + "enrollmentInfo": {"count": 10, "type": "ACTUAL"}, + }, + "descriptionModule": {"briefSummary": summary}, + "conditionsModule": {"conditions": ["Cancer"]}, + "armsInterventionsModule": { + "interventions": ( + [{"name": intervention, "type": "DRUG", "otherNames": []}] + if intervention + else [] + ) + }, + }, + } + + +class PaperClassificationTests(unittest.TestCase): + def test_patient_derived_xenograft_is_preclinical(self) -> None: + classification = research_target_evidence._classify_paper( + "Target activity in patient-derived xenografts", + "The treatment reduced growth in mice and cell lines.", + ["Journal Article"], + ) + + self.assertEqual(classification, "preclinical") + + def test_phase_one_trial_is_human(self) -> None: + classification = research_target_evidence._classify_paper( + "Phase I target study", + "Patients with advanced cancer received treatment.", + ["Clinical Trial, Phase I"], + ) + + self.assertEqual(classification, "human") + + def test_review_is_context_not_direct_human_evidence(self) -> None: + classification = research_target_evidence._classify_paper( + "Target review", + "This review discusses phase I studies and patients with cancer.", + ["Review"], + ) + + self.assertEqual(classification, "other") + + +class TrialFilteringTests(unittest.TestCase): + def test_summary_only_record_requires_a_known_program_alias(self) -> None: + records = research_target_evidence._parse_trials( + { + "studies": [ + trial( + "NCT00000001", + "Study of Cirmtuzumab", + official_title="A ROR1-targeted antibody study", + intervention="Cirmtuzumab", + ), + trial( + "NCT00000002", + "Cirmtuzumab extension study", + summary="The antibody binds ROR1.", + intervention="Cirmtuzumab", + ), + trial( + "NCT00000003", + "Broad sequencing study", + summary="The panel includes ROR1 among many genes.", + intervention="Genome sequencing", + ), + ] + }, + "ROR1", + 10, + ) + + self.assertEqual( + [record["nct_id"] for record in records], + ["NCT00000001", "NCT00000002"], + ) + + +class CacheTests(unittest.TestCase): + def test_cache_metadata_does_not_persist_request_url(self) -> None: + with tempfile.TemporaryDirectory() as directory: + retriever = research_target_evidence.Retriever( + Path(directory), "read-write", 60 + ) + url = "https://example.test/data?api_key=secret" + + retriever._write_cache(url, b"payload") + + _, metadata_path = retriever._cache_paths(url) + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + self.assertEqual(set(metadata), {"saved_at"}) + + +if __name__ == "__main__": + unittest.main() From b6861935b5891888da960c981c4ece3f28d6b27c Mon Sep 17 00:00:00 2001 From: Pearson Date: Tue, 23 Jun 2026 07:41:11 -0400 Subject: [PATCH 2/3] Remove target evidence cache --- .../research-target-evidence-skill/SKILL.md | 5 +- .../scripts/research_target_evidence.py | 68 ++----------------- .../scripts/test_research_target_evidence.py | 46 +++++++++---- 3 files changed, 36 insertions(+), 83 deletions(-) diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md index bcea2b860..a2ac620e6 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md +++ b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md @@ -6,7 +6,7 @@ description: Produce a bounded, source-backed evidence brief for a biological ta ## Research Target Evidence Use the bundled script exactly once for the requested target. Let it plan, -deduplicate, pace, cache, and batch requests across PubMed and +deduplicate, pace, and batch requests across PubMed and ClinicalTrials.gov. Do not decompose the request into additional source calls unless the script @@ -23,9 +23,6 @@ python scripts/research_target_evidence.py \ --separate-human-preclinical ``` -The script uses a six-hour response cache by default. Use `--cache-mode off` -when the user explicitly needs a fresh retrieval. - ## Synthesis - Lead with the target-level conclusion and the largest uncertainty. diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py index 531af9606..87300cc33 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py +++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py @@ -4,7 +4,6 @@ from __future__ import annotations import argparse -import hashlib import json import os import re @@ -15,7 +14,6 @@ import urllib.request import xml.etree.ElementTree as ET from collections import defaultdict -from pathlib import Path from typing import Any EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" @@ -24,42 +22,9 @@ class Retriever: - def __init__(self, cache_dir: Path, cache_mode: str, ttl_seconds: int) -> None: - self.cache_dir = cache_dir - self.cache_mode = cache_mode - self.ttl_seconds = ttl_seconds + def __init__(self) -> None: self.requests: list[dict[str, Any]] = [] self.last_request_by_host: dict[str, float] = {} - if cache_mode != "off": - cache_dir.mkdir(parents=True, exist_ok=True) - - def _cache_paths(self, url: str) -> tuple[Path, Path]: - digest = hashlib.sha256(url.encode("utf-8")).hexdigest() - return self.cache_dir / f"{digest}.body", self.cache_dir / f"{digest}.json" - - def _read_cache(self, url: str) -> bytes | None: - if self.cache_mode not in {"read-only", "read-write"}: - return None - body_path, meta_path = self._cache_paths(url) - if not body_path.exists() or not meta_path.exists(): - return None - try: - meta = json.loads(meta_path.read_text(encoding="utf-8")) - except (OSError, json.JSONDecodeError): - return None - if time.time() - float(meta.get("saved_at", 0)) > self.ttl_seconds: - return None - return body_path.read_bytes() - - def _write_cache(self, url: str, body: bytes) -> None: - if self.cache_mode != "read-write": - return - body_path, meta_path = self._cache_paths(url) - body_path.write_bytes(body) - meta_path.write_text( - json.dumps({"saved_at": time.time()}, sort_keys=True), - encoding="utf-8", - ) def _pace(self, host: str) -> None: minimum_interval = 0.38 if host.endswith("ncbi.nlm.nih.gov") else 0.05 @@ -70,21 +35,6 @@ def _pace(self, host: str) -> None: def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes: query = urllib.parse.urlencode(params, doseq=True) url = f"{base_url}?{query}" if query else base_url - cached = self._read_cache(url) - if cached is not None: - self.requests.append( - { - "label": label, - "host": urllib.parse.urlparse(url).netloc, - "elapsed_ms": 0, - "bytes": len(cached), - "status": 200, - "cache_hit": True, - "attempt": 0, - } - ) - return cached - host = urllib.parse.urlparse(url).netloc last_error: Exception | None = None for attempt in range(1, 4): @@ -108,11 +58,9 @@ def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes: "elapsed_ms": elapsed_ms, "bytes": len(body), "status": status, - "cache_hit": False, "attempt": attempt, } ) - self._write_cache(url, body) return body except urllib.error.HTTPError as exc: status = exc.code @@ -128,7 +76,6 @@ def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes: "elapsed_ms": elapsed_ms, "bytes": len(body), "status": status, - "cache_hit": False, "attempt": attempt, "error": str(last_error), } @@ -564,17 +511,15 @@ def _query_trials( def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]: - network = [request for request in retriever.requests if not request["cache_hit"]] return { "elapsed_seconds": round(time.monotonic() - started, 3), "request_attempts": len(retriever.requests), - "network_requests": len(network), - "cache_hits": sum(request["cache_hit"] for request in retriever.requests), + "network_requests": len(retriever.requests), "retries": sum(request["attempt"] > 1 for request in retriever.requests), "rate_limit_events": sum( request["status"] == 429 for request in retriever.requests ), - "bytes_received": sum(request["bytes"] for request in network), + "bytes_received": sum(request["bytes"] for request in retriever.requests), "requests": retriever.requests, } @@ -588,18 +533,13 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--separate-human-preclinical", action="store_true") parser.add_argument("--max-papers", type=int, default=14) parser.add_argument("--max-trials", type=int, default=20) - parser.add_argument( - "--cache-mode", choices=["off", "read-only", "read-write"], default="read-write" - ) - parser.add_argument("--cache-ttl-seconds", type=int, default=21600) return parser.parse_args() def main() -> int: args = parse_args() started = time.monotonic() - cache_dir = Path.home() / ".cache" / "codex" / "research-target-evidence" - retriever = Retriever(cache_dir, args.cache_mode, args.cache_ttl_seconds) + retriever = Retriever() try: query_plan, papers = _query_pubmed(retriever, args.target, args.max_papers) total_trials, trials = _query_trials(retriever, args.target, args.max_trials) diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py index 43e335896..a8f5d8d96 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py +++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py @@ -2,8 +2,7 @@ from __future__ import annotations import importlib.util -import json -import tempfile +import time import unittest from pathlib import Path @@ -113,19 +112,36 @@ def test_summary_only_record_requires_a_known_program_alias(self) -> None: ) -class CacheTests(unittest.TestCase): - def test_cache_metadata_does_not_persist_request_url(self) -> None: - with tempfile.TemporaryDirectory() as directory: - retriever = research_target_evidence.Retriever( - Path(directory), "read-write", 60 - ) - url = "https://example.test/data?api_key=secret" - - retriever._write_cache(url, b"payload") - - _, metadata_path = retriever._cache_paths(url) - metadata = json.loads(metadata_path.read_text(encoding="utf-8")) - self.assertEqual(set(metadata), {"saved_at"}) +class TelemetryTests(unittest.TestCase): + def test_telemetry_reports_request_metrics(self) -> None: + retriever = research_target_evidence.Retriever() + retriever.requests = [ + { + "label": "example", + "host": "example.test", + "elapsed_ms": 10, + "bytes": 100, + "status": 200, + "attempt": 1, + } + ] + + telemetry = research_target_evidence._telemetry(retriever, time.monotonic()) + + self.assertEqual(telemetry["network_requests"], 1) + self.assertEqual(telemetry["bytes_received"], 100) + self.assertEqual( + set(telemetry), + { + "elapsed_seconds", + "request_attempts", + "network_requests", + "retries", + "rate_limit_events", + "bytes_received", + "requests", + }, + ) if __name__ == "__main__": From e68755c1bb29dd450f7f81ce79b6a2df79c5da6c Mon Sep 17 00:00:00 2001 From: Pearson Date: Tue, 23 Jun 2026 11:26:55 -0400 Subject: [PATCH 3/3] Add multi-target evidence comparison --- .../research-target-evidence-skill/SKILL.md | 29 +- .../agents/openai.yaml | 4 +- .../scripts/research_target_evidence.py | 458 +++++++++++++++--- .../scripts/test_research_target_evidence.py | 229 ++++++++- 4 files changed, 633 insertions(+), 87 deletions(-) diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md index a2ac620e6..4de35d090 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md +++ b/plugins/life-science-research/skills/research-target-evidence-skill/SKILL.md @@ -1,12 +1,12 @@ --- name: research-target-evidence-skill -description: Produce a bounded, source-backed evidence brief for a biological target, covering biology, therapeutic programs, human safety, and preclinical evidence. Use when a user asks for a target assessment, translational evidence, program history, modality comparison, or human-versus-preclinical safety review and wants a fast primary-source research pass. +description: Produce bounded, source-backed evidence briefs for one or more biological targets, covering biology, therapeutic programs, human safety, preclinical evidence, and cross-target comparison. Use when a user asks for a target assessment, target comparison, translational evidence, program history, modality comparison, or human-versus-preclinical safety review and wants a fast primary-source research pass. --- ## Research Target Evidence -Use the bundled script exactly once for the requested target. Let it plan, -deduplicate, pace, and batch requests across PubMed and +Use the bundled script exactly once for all requested targets. Let it plan, +deduplicate, globally pace, and batch requests across PubMed and ClinicalTrials.gov. Do not decompose the request into additional source calls unless the script @@ -14,7 +14,7 @@ returns `ok=false` or the user explicitly asks for a deeper follow-up. ## Execution -Extract the target and requested evidence axes, then run: +For one target, run: ```bash python scripts/research_target_evidence.py \ @@ -23,10 +23,29 @@ python scripts/research_target_evidence.py \ --separate-human-preclinical ``` +For a comparison, repeat `--target` in the same command: + +```bash +python scripts/research_target_evidence.py \ + --target "" \ + --target "" \ + --target "" \ + --mode compare \ + --questions biology programs safety \ + --separate-human-preclinical +``` + +Do not run one process per target. The shared process enforces global source +pacing, preserves landmark-program and evidence-class quotas, and emits a +single size-bounded JSON result. + ## Synthesis - Lead with the target-level conclusion and the largest uncertainty. - Separate human evidence from preclinical evidence. +- For comparisons, use the same evidence axes for every target and finish with + a compact comparison of validation, selectivity, safety, modality maturity, + and uncertainty. - Cover only modalities supported by the returned papers or trial records. - Link each PMID as `[PMID ](https://pubmed.ncbi.nlm.nih.gov//)`. - Link each trial as `[](https://clinicaltrials.gov/study/)`. @@ -34,6 +53,8 @@ python scripts/research_target_evidence.py \ - Treat registry adverse-event counts as non-attributed unless the record says otherwise. - Preserve the returned limitations and current registry statuses. +- Report per-target errors or omitted evidence counts rather than silently + treating a partial result as complete. - Keep the answer concise enough that the evidence hierarchy remains visible. The retrieval is bounded and relevance-ranked, not a systematic review. Check diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml index 06c26d113..590c8c94b 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml +++ b/plugins/life-science-research/skills/research-target-evidence-skill/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Research Target Evidence" - short_description: "Bounded target biology and safety evidence" - default_prompt: "Use $research-target-evidence-skill to summarize target biology, therapeutic programs, and safety evidence." + short_description: "Bounded target evidence and comparison" + default_prompt: "Use $research-target-evidence-skill to assess or compare target biology, therapeutic programs, and safety evidence." diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py index 87300cc33..0191545b2 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py +++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/research_target_evidence.py @@ -18,7 +18,14 @@ EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" CTGOV_STUDIES = "https://clinicaltrials.gov/api/v2/studies" -USER_AGENT = "research-target-evidence/0.1" +USER_AGENT = "research-target-evidence/0.2" +MAX_TARGETS = 6 +DEFAULT_OUTPUT_CHARS = 30_000 + + +def _safe_error(error: Exception | None) -> str: + message = str(error or "unknown error") + return re.sub(r"([?&]api_key=)[^&\s]+", r"\1", message) class Retriever: @@ -77,13 +84,13 @@ def get(self, base_url: str, params: dict[str, Any], label: str) -> bytes: "bytes": len(body), "status": status, "attempt": attempt, - "error": str(last_error), + "error": _safe_error(last_error), } ) if status not in {429, 500, 502, 503, 504} and status != 0: break time.sleep(0.8 * (2 ** (attempt - 1))) - raise RuntimeError(f"Request failed for {label}: {last_error}") + raise RuntimeError(f"Request failed for {label}: {_safe_error(last_error)}") def _ncbi_params(params: dict[str, Any]) -> dict[str, Any]: @@ -123,6 +130,43 @@ def _year(article: ET.Element) -> str | None: return None +def _clip(text: str, limit: int) -> str: + normalized = " ".join(text.split()) + if len(normalized) <= limit: + return normalized + clipped = normalized[: max(0, limit - 1)].rsplit(" ", 1)[0] + return f"{clipped}..." if clipped else normalized[:limit] + + +def _best_excerpt( + abstract: str, + terms: tuple[str, ...], + limit: int = 350, + *, + require_term: bool = False, +) -> str: + sentences = [ + sentence.strip() + for sentence in re.split(r"(?<=[.!?])\s+", " ".join(abstract.split())) + if sentence.strip() + ] + if not sentences: + return "" + + def score(sentence: str) -> tuple[int, int, int]: + lowered = sentence.lower() + term_score = sum(term in lowered for term in terms) + numeric_score = int(bool(re.search(r"\b\d+(?:\.\d+)?%?\b", sentence))) + return term_score, numeric_score, -sentences.index(sentence) + + best = max(sentences, key=score) + if require_term and score(best)[0] == 0: + return "" + if score(best)[:2] == (0, 0): + best = sentences[0] + return _clip(best, limit) + + def _classify_paper(title: str, abstract: str, publication_types: list[str]) -> str: text = f"{title} {abstract} {' '.join(publication_types)}".lower() human_text = text.replace("patient-derived", "") @@ -205,16 +249,75 @@ def _parse_pubmed_xml( "year": _year(article), "journal": article.findtext("Journal/Title"), "doi": ids.get("doi"), - "publication_types": publication_types, "classification": _classify_paper(title, abstract, publication_types), "matched_queries": memberships.get(pmid, []), - "abstract_excerpt": abstract[:1200], + "result_excerpt": _best_excerpt( + abstract, + ( + "response", + "survival", + "progression", + "efficacy", + "randomized", + "patients", + "participants", + "objective", + "complete remission", + "partial remission", + "did not", + "no response", + ), + ), + "safety_excerpt": _best_excerpt( + abstract, + ( + "adverse", + "safety", + "toxicity", + "cytokine", + "neutropenia", + "diarrhea", + "nausea", + "vomiting", + "neuropathy", + "pneumonitis", + "death", + "grade 3", + "grade 4", + "tolerated", + ), + require_term=True, + ), "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/", } ) return papers +def _select_paper_cards( + papers: list[dict[str, Any]], max_papers: int +) -> list[dict[str, Any]]: + selected: list[dict[str, Any]] = [] + + def take(predicate: Any) -> None: + paper = next( + (item for item in papers if item not in selected and predicate(item)), None + ) + if paper is not None and len(selected) < max_papers: + selected.append(paper) + + take(lambda item: "landmark" in item["matched_queries"]) + take(lambda item: "landmark" in item["matched_queries"]) + take(lambda item: item["classification"] == "human") + take(lambda item: "safety" in item["matched_queries"]) + take(lambda item: item["classification"] == "preclinical") + take(lambda item: "biology" in item["matched_queries"]) + for paper in papers: + if paper not in selected and len(selected) < max_papers: + selected.append(paper) + return selected + + def _round_robin_ids(groups: dict[str, list[str]], limit: int) -> list[str]: selected: list[str] = [] seen: set[str] = set() @@ -238,13 +341,16 @@ def _round_robin_ids(groups: dict[str, list[str]], limit: int) -> list[str]: def _query_pubmed( - retriever: Retriever, target: str, max_papers: int -) -> tuple[dict[str, str], list[dict[str, Any]]]: + retriever: Retriever, + target: str, + max_papers: int, +) -> tuple[dict[str, str], list[dict[str, Any]], int]: query_plan = { "biology": f"{target}[Title/Abstract] AND (biology[Title/Abstract] OR signaling[Title/Abstract] OR expression[Title/Abstract] OR function[Title/Abstract])", - "human": f'{target}[Title/Abstract] AND (clinical trial[Publication Type] OR "phase 1"[Title/Abstract] OR patient[Title/Abstract] OR safety[Title/Abstract])', + "human": f"{target}[Title/Abstract] AND (clinical trial[Publication Type] OR phase[Title/Abstract] OR randomized[Title/Abstract] OR patient[Title/Abstract] OR safety[Title/Abstract])", "modalities": f'{target}[Title/Abstract] AND (antibody[Title/Abstract] OR ADC[Title/Abstract] OR "CAR T"[Title/Abstract] OR "chimeric antigen receptor"[Title/Abstract])', "safety": f'{target}[Title/Abstract] AND ("normal tissue"[Title/Abstract] OR toxicity[Title/Abstract] OR safety[Title/Abstract] OR adverse[Title/Abstract])', + "landmark": f'{target}[Title/Abstract] AND (randomized[Title/Abstract] OR randomised[Title/Abstract] OR "phase 2"[Title/Abstract] OR "phase II"[Title/Abstract] OR "phase 3"[Title/Abstract] OR "phase III"[Title/Abstract] OR terminated[Title/Abstract] OR failed[Title/Abstract])', } groups: dict[str, list[str]] = {} memberships: dict[str, list[str]] = defaultdict(list) @@ -257,7 +363,7 @@ def _query_pubmed( "db": "pubmed", "term": term, "retmode": "json", - "retmax": 12, + "retmax": 30 if name == "landmark" else 12, "sort": "relevance", } ), @@ -268,9 +374,10 @@ def _query_pubmed( for pmid in ids: memberships[pmid].append(name) - candidate_ids = _round_robin_ids(groups, max(max_papers * 3, max_papers)) + available_count = len(memberships) + candidate_ids = _round_robin_ids(groups, max(max_papers * 10, 80)) if not candidate_ids: - return query_plan, [] + return query_plan, [], available_count summary = _json_request( retriever, @@ -282,6 +389,9 @@ def _query_pubmed( ) result = summary.get("result", {}) target_token = target.lower() + candidate_positions = { + pmid: position for position, pmid in enumerate(candidate_ids) + } def rank(pmid: str) -> tuple[int, int]: title = str(result.get(pmid, {}).get("title") or "").lower() @@ -303,15 +413,49 @@ def rank(pmid: str) -> tuple[int, int]: ) return title_score + evidence_score + len( memberships[pmid] - ), -candidate_ids.index(pmid) + ), -candidate_positions.get(pmid, len(candidate_ids)) - selected = sorted(candidate_ids, key=rank, reverse=True)[:max_papers] + def landmark_rank(pmid: str) -> tuple[int, int]: + title = str(result.get(pmid, {}).get("title") or "").lower() + evidence_score = sum( + weight * (term in title) + for term, weight in ( + ("randomized", 5), + ("randomised", 5), + ("placebo", 3), + ("phase iii", 3), + ("phase 3", 3), + ("phase ii", 2), + ("phase 2", 2), + ("terminated", 2), + ("failed", 2), + ) + ) + return evidence_score, -candidate_positions.get(pmid, len(candidate_ids)) + + ranked = sorted(candidate_ids, key=rank, reverse=True) + fetch_limit = min(len(ranked), max(max_papers * 2, max_papers)) + selected: list[str] = [] + for group_name in ("landmark", "human", "safety", "biology", "modalities"): + group_ranked = sorted( + groups.get(group_name, []), + key=landmark_rank if group_name == "landmark" else rank, + reverse=True, + ) + first = next((pmid for pmid in group_ranked if pmid not in selected), None) + if first: + selected.append(first) + for pmid in ranked: + if pmid not in selected and len(selected) < fetch_limit: + selected.append(pmid) + selected = selected[:fetch_limit] xml_body = retriever.get( f"{EUTILS_BASE}/efetch.fcgi", _ncbi_params({"db": "pubmed", "id": ",".join(selected), "retmode": "xml"}), "pubmed-efetch-selected", ) - return query_plan, _parse_pubmed_xml(xml_body, memberships) + papers = _parse_pubmed_xml(xml_body, memberships) + return query_plan, _select_paper_cards(papers, max_papers), available_count def _serious_events(study: dict[str, Any]) -> list[dict[str, Any]]: @@ -331,7 +475,7 @@ def _serious_events(study: dict[str, Any]) -> list[dict[str, Any]]: } ) summarized.sort(key=lambda item: item["num_affected"], reverse=True) - return summarized[:8] + return summarized[:5] def _trial_relevance(study: dict[str, Any], target: str) -> int: @@ -345,7 +489,13 @@ def _trial_relevance(study: dict[str, Any], target: str) -> int: for field in ("briefTitle", "officialTitle") ) intervention_text = " ".join( - " ".join([str(item.get("name") or ""), *map(str, item.get("otherNames", []))]) + " ".join( + [ + str(item.get("name") or ""), + *map(str, item.get("otherNames", [])), + str(item.get("description") or ""), + ] + ) for item in interventions ) summary_text = " ".join( @@ -414,6 +564,18 @@ def _trial_program_tokens( } +def _trial_phase_score(study: dict[str, Any]) -> int: + phases = study.get("protocolSection", {}).get("designModule", {}).get("phases", []) + scores = { + "EARLY_PHASE1": 1, + "PHASE1": 1, + "PHASE2": 2, + "PHASE3": 3, + "PHASE4": 4, + } + return max((scores.get(phase, 0) for phase in phases), default=0) + + def _parse_trials( data: dict[str, Any], target: str, max_trials: int ) -> list[dict[str, Any]]: @@ -455,38 +617,36 @@ def _parse_trials( "why_stopped": status.get("whyStopped"), "phases": design.get("phases", []), "enrollment": design.get("enrollmentInfo"), - "conditions": protocol.get("conditionsModule", {}).get( - "conditions", [] - ), "interventions": [ { "name": item.get("name"), "type": item.get("type"), "other_names": item.get("otherNames", []), } - for item in interventions[:8] + for item in interventions[:6] ], - "brief_summary": protocol.get("descriptionModule", {}).get( - "briefSummary" - ), "has_results": bool(study.get("hasResults")), "primary_outcomes": [ - { - "title": outcome.get("title"), - "description": outcome.get("description"), - } + outcome.get("title") for outcome in outcomes if outcome.get("type") == "PRIMARY" - ][:5], + ][:3], "serious_adverse_events": _serious_events(study), "target_relevance": relevance, + "phase_score": _trial_phase_score(study), "url": f"https://clinicaltrials.gov/study/{nct_id}" if nct_id else None, } ) trials.sort( key=lambda item: ( + -( + item["target_relevance"] + + 6 * item["has_results"] + + 2 * item["phase_score"] + ), -item["target_relevance"], not item["has_results"], + -item["phase_score"], item["status"] or "", ) ) @@ -495,7 +655,7 @@ def _parse_trials( def _query_trials( retriever: Retriever, target: str, max_trials: int -) -> tuple[int | None, list[dict[str, Any]]]: +) -> tuple[int | None, int, list[dict[str, Any]]]: data = _json_request( retriever, CTGOV_STUDIES, @@ -507,11 +667,14 @@ def _query_trials( }, "clinicaltrials-target-search", ) - return data.get("totalCount"), _parse_trials(data, target, max_trials) + records = _parse_trials(data, target, 100) + return data.get("totalCount"), len(records), records[:max_trials] -def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]: - return { +def _telemetry( + retriever: Retriever, started: float, *, include_requests: bool = False +) -> dict[str, Any]: + telemetry = { "elapsed_seconds": round(time.monotonic() - started, 3), "request_attempts": len(retriever.requests), "network_requests": len(retriever.requests), @@ -520,59 +683,214 @@ def _telemetry(retriever: Retriever, started: float) -> dict[str, Any]: request["status"] == 429 for request in retriever.requests ), "bytes_received": sum(request["bytes"] for request in retriever.requests), - "requests": retriever.requests, } + if include_requests: + telemetry["requests"] = retriever.requests + return telemetry + + +def _compact_trial(trial: dict[str, Any]) -> dict[str, Any]: + programs: list[str] = [] + seen: set[str] = set() + for intervention in trial.get("interventions", []): + name = " ".join(str(intervention.get("name") or "").split()) + if name and name.lower() not in seen: + programs.append(name) + seen.add(name.lower()) + return { + "nct_id": trial.get("nct_id"), + "title": _clip(str(trial.get("title") or ""), 220), + "status": trial.get("status"), + "why_stopped": _clip(str(trial.get("why_stopped") or ""), 180) or None, + "phases": trial.get("phases", []), + "enrollment": trial.get("enrollment"), + "programs": programs[:4], + "has_results": trial.get("has_results", False), + "primary_outcomes": [ + _clip(str(title or ""), 180) + for title in trial.get("primary_outcomes", [])[:2] + ], + "serious_adverse_events": trial.get("serious_adverse_events", [])[:5], + "url": trial.get("url"), + } + + +def _target_evidence( + retriever: Retriever, target: str, max_papers: int, max_trials: int +) -> dict[str, Any]: + discovery_limit = max(max_trials * 3, 15) + total_trials, relevant_trials, trial_records = _query_trials( + retriever, target, discovery_limit + ) + query_plan, papers, available_papers = _query_pubmed(retriever, target, max_papers) + grouped_papers = { + group: [paper for paper in papers if paper["classification"] == group] + for group in ("human", "preclinical", "other") + } + visible_trials = trial_records[:max_trials] + return { + "ok": True, + "target": target, + "source_coverage": { + "pubmed_query_axes": list(query_plan), + "clinicaltrials_total_count": total_trials, + }, + "papers": grouped_papers, + "trials": { + "relevant_count": relevant_trials, + "records": [_compact_trial(trial) for trial in visible_trials], + }, + "omitted": { + "papers": max(0, available_papers - len(papers)), + "trials": max(0, relevant_trials - len(visible_trials)), + }, + } + + +def _encoded_size(output: dict[str, Any]) -> int: + return len(json.dumps(output, separators=(",", ":"), ensure_ascii=True)) + 1 + + +def _drop_lowest_priority_card(output: dict[str, Any]) -> bool: + successful = [target for target in output["targets"] if target.get("ok")] + priorities = ( + ("paper", "other", 0), + ("trial", "records", 3), + ("paper", "preclinical", 1), + ("paper", "human", 2), + ("trial", "records", 1), + ("paper", "human", 1), + ) + for kind, group, minimum in priorities: + for target in reversed(successful): + records = ( + target["trials"][group] if kind == "trial" else target["papers"][group] + ) + if len(records) <= minimum: + continue + records.pop() + target["omitted"]["trials" if kind == "trial" else "papers"] += 1 + return True + return False + + +def _shrink_excerpts(output: dict[str, Any], limit: int) -> None: + for target in output["targets"]: + if not target.get("ok"): + continue + for group in target["papers"].values(): + for paper in group: + for field in ("result_excerpt", "safety_excerpt"): + paper[field] = _clip(str(paper.get(field) or ""), limit) + + +def _enforce_output_budget( + output: dict[str, Any], max_output_chars: int +) -> dict[str, Any]: + output["output_budget"] = { + "max_characters": max_output_chars, + "actual_characters": 0, + "cards_omitted_for_budget": False, + } + while _encoded_size(output) > max_output_chars: + if not _drop_lowest_priority_card(output): + break + output["output_budget"]["cards_omitted_for_budget"] = True + if _encoded_size(output) > max_output_chars: + _shrink_excerpts(output, 180) + while _encoded_size(output) > max_output_chars: + if not _drop_lowest_priority_card(output): + break + output["output_budget"]["cards_omitted_for_budget"] = True + for _ in range(3): + output["output_budget"]["actual_characters"] = _encoded_size(output) + return output + + +def _targets(values: list[str]) -> list[str]: + targets: list[str] = [] + seen: set[str] = set() + for value in values: + target = " ".join(value.split()) + normalized = target.casefold() + if target and normalized not in seen: + targets.append(target) + seen.add(normalized) + return targets def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() - parser.add_argument("--target", required=True) + parser.add_argument("--target", action="append", required=True) + parser.add_argument("--mode", choices=("auto", "brief", "compare"), default="auto") parser.add_argument( "--questions", nargs="+", default=["biology", "programs", "safety"] ) parser.add_argument("--separate-human-preclinical", action="store_true") - parser.add_argument("--max-papers", type=int, default=14) - parser.add_argument("--max-trials", type=int, default=20) - return parser.parse_args() + parser.add_argument("--max-papers", type=int) + parser.add_argument("--max-trials", type=int) + parser.add_argument("--max-output-chars", type=int, default=DEFAULT_OUTPUT_CHARS) + parser.add_argument("--debug-telemetry", action="store_true") + args = parser.parse_args() + args.target = _targets(args.target) + if not args.target: + parser.error("at least one non-empty --target is required") + if len(args.target) > MAX_TARGETS: + parser.error(f"at most {MAX_TARGETS} targets are supported per invocation") + if args.mode == "brief" and len(args.target) > 1: + parser.error("--mode brief accepts exactly one target") + if args.max_output_chars < 5_000: + parser.error("--max-output-chars must be at least 5000") + if args.max_papers is not None and args.max_papers < 1: + parser.error("--max-papers must be positive") + if args.max_papers is not None and args.max_papers > 30: + parser.error("--max-papers cannot exceed 30") + if args.max_trials is not None and args.max_trials < 1: + parser.error("--max-trials must be positive") + if args.max_trials is not None and args.max_trials > 50: + parser.error("--max-trials cannot exceed 50") + return args def main() -> int: args = parse_args() started = time.monotonic() retriever = Retriever() - try: - query_plan, papers = _query_pubmed(retriever, args.target, args.max_papers) - total_trials, trials = _query_trials(retriever, args.target, args.max_trials) - grouped_papers = { - group: [paper for paper in papers if paper["classification"] == group] - for group in ("human", "preclinical", "other") - } - output = { - "ok": True, - "target": args.target, - "questions": args.questions, - "separate_human_preclinical": args.separate_human_preclinical, - "query_plan": query_plan, - "papers": grouped_papers, - "trials": { - "total_count": total_trials, - "records": trials, - }, - "limitations": [ - "PubMed retrieval is relevance-ranked and bounded; it is not a systematic review.", - "Paper classification is heuristic and should be checked during synthesis.", - "ClinicalTrials.gov event counts are not automatically treatment-attributed.", - "Current program status is limited to the retrieved registry snapshot.", - ], - "telemetry": _telemetry(retriever, started), - } - except Exception as exc: # noqa: BLE001 - output = { - "ok": False, - "target": args.target, - "error": {"type": type(exc).__name__, "message": str(exc)}, - "telemetry": _telemetry(retriever, started), - } + mode = "compare" if args.mode == "compare" or len(args.target) > 1 else "brief" + max_papers = args.max_papers or (8 if mode == "compare" else 10) + max_trials = args.max_trials or (5 if mode == "compare" else 8) + targets: list[dict[str, Any]] = [] + for target in args.target: + try: + targets.append(_target_evidence(retriever, target, max_papers, max_trials)) + except Exception as exc: # noqa: BLE001 + targets.append( + { + "ok": False, + "target": target, + "error": {"type": type(exc).__name__, "message": str(exc)}, + } + ) + succeeded = sum(target["ok"] for target in targets) + output = { + "schema_version": 1, + "ok": succeeded > 0, + "partial": 0 < succeeded < len(targets), + "mode": mode, + "questions": args.questions, + "separate_human_preclinical": args.separate_human_preclinical, + "targets": targets, + "limitations": [ + "PubMed retrieval is relevance-ranked and bounded; it is not a systematic review.", + "Paper classification is heuristic and should be checked during synthesis.", + "ClinicalTrials.gov event counts are not automatically treatment-attributed.", + "Current program status is limited to the retrieved registry snapshot.", + ], + "telemetry": _telemetry( + retriever, started, include_requests=args.debug_telemetry + ), + } + output = _enforce_output_budget(output, args.max_output_chars) json.dump(output, sys.stdout, separators=(",", ":"), ensure_ascii=True) sys.stdout.write("\n") return 0 if output["ok"] else 1 diff --git a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py index a8f5d8d96..00fa6d446 100644 --- a/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py +++ b/plugins/life-science-research/skills/research-target-evidence-skill/scripts/test_research_target_evidence.py @@ -1,10 +1,15 @@ #!/usr/bin/env python3 from __future__ import annotations +import argparse +import io +import json import importlib.util import time import unittest +from contextlib import redirect_stdout from pathlib import Path +from unittest import mock SCRIPT_PATH = Path(__file__).with_name("research_target_evidence.py") SPEC = importlib.util.spec_from_file_location("research_target_evidence", SCRIPT_PATH) @@ -20,9 +25,11 @@ def trial( official_title: str = "", summary: str = "", intervention: str = "", + phase: str = "PHASE1", + has_results: bool = False, ) -> dict: return { - "hasResults": False, + "hasResults": has_results, "protocolSection": { "identificationModule": { "nctId": nct_id, @@ -32,7 +39,7 @@ def trial( "statusModule": {"overallStatus": "COMPLETED"}, "designModule": { "studyType": "INTERVENTIONAL", - "phases": ["PHASE1"], + "phases": [phase], "enrollmentInfo": {"count": 10, "type": "ACTUAL"}, }, "descriptionModule": {"briefSummary": summary}, @@ -49,6 +56,15 @@ def trial( class PaperClassificationTests(unittest.TestCase): + def test_safety_excerpt_requires_a_safety_term(self) -> None: + excerpt = research_target_evidence._best_excerpt( + "The response rate was 42%. Median survival was 10 months.", + ("adverse", "toxicity"), + require_term=True, + ) + + self.assertEqual(excerpt, "") + def test_patient_derived_xenograft_is_preclinical(self) -> None: classification = research_target_evidence._classify_paper( "Target activity in patient-derived xenografts", @@ -111,9 +127,43 @@ def test_summary_only_record_requires_a_known_program_alias(self) -> None: ["NCT00000001", "NCT00000002"], ) + def test_result_bearing_mature_trial_ranks_first(self) -> None: + records = research_target_evidence._parse_trials( + { + "studies": [ + trial( + "NCT00000001", + "TROP2 CAR-T study", + intervention="TROP2 CAR-T", + ), + trial( + "NCT00000002", + "TROP2 phase 3 study", + intervention="TROP2 sacituzumab govitecan", + phase="PHASE3", + has_results=True, + ), + ] + }, + "TROP2", + 10, + ) + + self.assertEqual(records[0]["nct_id"], "NCT00000002") + class TelemetryTests(unittest.TestCase): - def test_telemetry_reports_request_metrics(self) -> None: + def test_api_key_is_redacted_from_errors(self) -> None: + error = RuntimeError( + "https://example.test/path?x=1&api_key=secret&retmode=json" + ) + + self.assertEqual( + research_target_evidence._safe_error(error), + "https://example.test/path?x=1&api_key=&retmode=json", + ) + + def test_telemetry_is_compact_by_default(self) -> None: retriever = research_target_evidence.Retriever() retriever.requests = [ { @@ -130,18 +180,175 @@ def test_telemetry_reports_request_metrics(self) -> None: self.assertEqual(telemetry["network_requests"], 1) self.assertEqual(telemetry["bytes_received"], 100) + self.assertNotIn("requests", telemetry) + + debug = research_target_evidence._telemetry( + retriever, time.monotonic(), include_requests=True + ) + self.assertEqual(debug["requests"], retriever.requests) + + +class MultiTargetTests(unittest.TestCase): + def test_round_robin_ids_balances_query_groups(self) -> None: + self.assertEqual( + research_target_evidence._round_robin_ids( + {"biology": ["1", "2"], "safety": ["3", "2"]}, 3 + ), + ["1", "3", "2"], + ) + + def test_targets_are_deduplicated_without_reordering(self) -> None: self.assertEqual( - set(telemetry), + research_target_evidence._targets([" GPC3 ", "CLDN18.2", "gpc3", "TROP2"]), + ["GPC3", "CLDN18.2", "TROP2"], + ) + + def test_pubmed_group_ranking_handles_ids_outside_candidate_bound(self) -> None: + def request(_retriever, _base_url, _params, label): + if label.startswith("pubmed-esearch"): + suffix = label.rsplit("-", 1)[-1] + ids = { + "biology": ["1", "5"], + "human": ["2", "6"], + "modalities": ["3", "7"], + "safety": ["4", "8"], + "landmark": [str(value) for value in range(9, 111)], + }[suffix] + return {"esearchresult": {"idlist": ids}} + return { + "result": { + "1": {"title": "GPC3 biology"}, + "2": {"title": "GPC3 phase 1"}, + "3": {"title": "GPC3 antibody"}, + "4": {"title": "GPC3 safety"}, + "5": {"title": "GPC3 expression"}, + } + } + + retriever = mock.Mock() + retriever.get.return_value = b"" + with mock.patch.object( + research_target_evidence, "_json_request", side_effect=request + ): + _, papers, available = research_target_evidence._query_pubmed( + retriever, "GPC3", 1 + ) + + self.assertEqual(papers, []) + self.assertEqual(available, 110) + + def test_paper_selection_preserves_program_and_evidence_classes(self) -> None: + papers = [ + { + "pmid": "1", + "classification": "human", + "matched_queries": ["human"], + }, + { + "pmid": "2", + "classification": "other", + "matched_queries": ["landmark"], + }, + { + "pmid": "3", + "classification": "preclinical", + "matched_queries": ["biology"], + }, + { + "pmid": "4", + "classification": "human", + "matched_queries": ["safety"], + }, { - "elapsed_seconds", - "request_attempts", - "network_requests", - "retries", - "rate_limit_events", - "bytes_received", - "requests", + "pmid": "5", + "classification": "human", + "matched_queries": ["landmark"], }, + ] + + selected = research_target_evidence._select_paper_cards(papers, 5) + + self.assertEqual( + {paper["pmid"] for paper in selected}, {"1", "2", "3", "4", "5"} + ) + self.assertEqual([paper["pmid"] for paper in selected[:2]], ["2", "5"]) + + def test_output_budget_drops_cards_without_truncating_json(self) -> None: + paper = { + "pmid": "1", + "title": "A" * 200, + "classification": "human", + "matched_queries": ["landmark"], + "result_excerpt": "R" * 500, + "safety_excerpt": "S" * 500, + "url": "https://pubmed.ncbi.nlm.nih.gov/1/", + } + target = { + "ok": True, + "target": "GPC3", + "source_coverage": {}, + "papers": { + "human": [dict(paper, pmid=str(index)) for index in range(8)], + "preclinical": [dict(paper, pmid=str(index)) for index in range(8, 12)], + "other": [dict(paper, pmid=str(index)) for index in range(12, 16)], + }, + "trials": {"relevant_count": 0, "records": []}, + "omitted": {"papers": 0, "trials": 0}, + } + output = {"ok": True, "targets": [target], "telemetry": {}} + + bounded = research_target_evidence._enforce_output_budget(output, 5_000) + encoded = json.dumps(bounded, separators=(",", ":"), ensure_ascii=True) + + self.assertLessEqual(len(encoded) + 1, 5_000) + self.assertTrue(bounded["output_budget"]["cards_omitted_for_budget"]) + self.assertGreater(target["omitted"]["papers"], 0) + + def test_main_uses_one_retriever_and_preserves_partial_success(self) -> None: + args = argparse.Namespace( + target=["GPC3", "BROKEN"], + mode="auto", + questions=["biology", "programs", "safety"], + separate_human_preclinical=True, + max_papers=None, + max_trials=None, + max_output_chars=10_000, + debug_telemetry=False, ) + retrievers: list[object] = [] + + def target_evidence(retriever, target, max_papers, max_trials): + retrievers.append(retriever) + if target == "BROKEN": + raise RuntimeError("expected failure") + return { + "ok": True, + "target": target, + "source_coverage": {}, + "papers": {"human": [], "preclinical": [], "other": []}, + "trials": {"relevant_count": 0, "records": []}, + "omitted": {"papers": 0, "trials": 0}, + } + + stdout = io.StringIO() + with ( + mock.patch.object( + research_target_evidence, "parse_args", return_value=args + ), + mock.patch.object( + research_target_evidence, + "_target_evidence", + side_effect=target_evidence, + ), + redirect_stdout(stdout), + ): + exit_code = research_target_evidence.main() + + payload = json.loads(stdout.getvalue()) + self.assertEqual(exit_code, 0) + self.assertTrue(payload["partial"]) + self.assertEqual([target["ok"] for target in payload["targets"]], [True, False]) + self.assertIs(retrievers[0], retrievers[1]) if __name__ == "__main__":