From 86277588d87c66db88e824fb8b7d062942b1c3ab Mon Sep 17 00:00:00 2001 From: "tho.nguyen" <91511523+haki203@users.noreply.github.com> Date: Sat, 23 May 2026 12:30:17 +0700 Subject: [PATCH] Add biological accession crosswalk guard --- .../README.md | 24 ++ biological-accession-crosswalk-guard/demo.js | 74 +++++ biological-accession-crosswalk-guard/index.js | 275 ++++++++++++++++++ .../package.json | 12 + .../render-video.js | 59 ++++ .../reports/accession-crosswalk-packet.json | 144 +++++++++ .../reports/accession-crosswalk-report.md | 40 +++ .../reports/demo.mp4 | Bin 0 -> 8372 bytes .../reports/summary.svg | 31 ++ .../sample-data.js | 59 ++++ biological-accession-crosswalk-guard/test.js | 155 ++++++++++ 11 files changed, 873 insertions(+) create mode 100644 biological-accession-crosswalk-guard/README.md create mode 100644 biological-accession-crosswalk-guard/demo.js create mode 100644 biological-accession-crosswalk-guard/index.js create mode 100644 biological-accession-crosswalk-guard/package.json create mode 100644 biological-accession-crosswalk-guard/render-video.js create mode 100644 biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json create mode 100644 biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md create mode 100644 biological-accession-crosswalk-guard/reports/demo.mp4 create mode 100644 biological-accession-crosswalk-guard/reports/summary.svg create mode 100644 biological-accession-crosswalk-guard/sample-data.js create mode 100644 biological-accession-crosswalk-guard/test.js diff --git a/biological-accession-crosswalk-guard/README.md b/biological-accession-crosswalk-guard/README.md new file mode 100644 index 00000000..47c32ff3 --- /dev/null +++ b/biological-accession-crosswalk-guard/README.md @@ -0,0 +1,24 @@ +# Biological Accession Crosswalk Guard + +Self-contained Scientific Knowledge Graph Integration slice for SCIBASE issue #17. + +The guard validates canonical biological accession nodes before entity pages or graph recommendations are shown. It checks namespace-specific formats for NCBI Gene, UniProtKB, PubChem, and MeSH identifiers; deprecated aliases; taxon mismatch; DOI-backed evidence; low crosswalk confidence; duplicate canonical targets; and unsafe recommendation paths that depend on unresolved nodes. + +## Files + +- `index.js` - dependency-free evaluator and Markdown reviewer packet builder +- `sample-data.js` - synthetic biological knowledge graph nodes +- `test.js` - Node test coverage for hold, review, recommendation suppression, and approved paths +- `demo.js` - writes JSON, Markdown, and SVG reviewer artifacts under `reports/` +- `render-video.js` - creates a short MP4 demo artifact + +## Validation + +```bash +npm run check +npm test +npm run demo +npm run video +``` + +Synthetic data only. No private research objects, external ontology services, registry calls, recommendation services, credentials, network calls, payment data, or payout details are used. diff --git a/biological-accession-crosswalk-guard/demo.js b/biological-accession-crosswalk-guard/demo.js new file mode 100644 index 00000000..8d260649 --- /dev/null +++ b/biological-accession-crosswalk-guard/demo.js @@ -0,0 +1,74 @@ +const fs = require('node:fs'); +const path = require('node:path'); + +const {evaluateBiologicalAccessionCrosswalk, buildReviewerPacket} = require('./index'); +const {samplePacket} = require('./sample-data'); + +const REPORT_DIR = path.join(__dirname, 'reports'); + +function escapeXml(value) { + return String(value) + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"'); +} + +function buildSummarySvg(result) { + const scoreWidth = Math.max(20, Math.min(740, result.readinessScore * 7.4)); + const findingWidth = Math.max(20, Math.min(740, result.findings.length * 70)); + const actionWidth = Math.max(20, Math.min(740, result.requiredActions.length * 82)); + const topFindings = result.findings.slice(0, 5); + + return ` + + + Biological accession crosswalk guard + ${escapeXml(result.graphId)} • ${escapeXml(result.decision)} + + Readiness score + + + ${result.readinessScore}/100 + + + Findings + + + ${result.findings.length} + + + Required actions + + + ${result.requiredActions.length} + + Top curator checks + ${topFindings.map((finding, index) => `• ${escapeXml(finding.type)} for ${escapeXml(finding.nodeId)}`).join('\n ')} + Synthetic KG data only. No external ontology, registry, or live recommendation calls. +`; +} + +function main() { + fs.mkdirSync(REPORT_DIR, {recursive: true}); + const result = evaluateBiologicalAccessionCrosswalk(samplePacket); + fs.writeFileSync(path.join(REPORT_DIR, 'accession-crosswalk-packet.json'), `${JSON.stringify(result, null, 2)}\n`); + fs.writeFileSync(path.join(REPORT_DIR, 'accession-crosswalk-report.md'), buildReviewerPacket(result)); + fs.writeFileSync(path.join(REPORT_DIR, 'summary.svg'), buildSummarySvg(result)); + console.log(JSON.stringify({ + graphId: result.graphId, + decision: result.decision, + readinessScore: result.readinessScore, + findings: result.findings.length, + requiredActions: result.requiredActions.length, + auditDigest: result.auditDigest, + }, null, 2)); +} + +if (require.main === module) { + main(); +} + +module.exports = { + buildSummarySvg, +}; diff --git a/biological-accession-crosswalk-guard/index.js b/biological-accession-crosswalk-guard/index.js new file mode 100644 index 00000000..125e9d7d --- /dev/null +++ b/biological-accession-crosswalk-guard/index.js @@ -0,0 +1,275 @@ +const crypto = require('node:crypto'); + +const FORMAT_PATTERNS = { + NCBIGene: /^\d+$/, + UniProtKB: /^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9][A-Z][A-Z0-9]{2}[0-9])$/, + PubChem: /^\d+$/, + MeSH: /^D\d{6}$/, +}; + +const CRITICAL_FINDINGS = new Set([ + 'invalid-accession-format', + 'duplicate-canonical-target', +]); + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function addFinding(findings, finding) { + findings.push({ + severity: finding.severity || 'major', + ...finding, + }); +} + +function isValidFormat(node) { + const pattern = FORMAT_PATTERNS[node.namespace]; + if (!pattern) return false; + return pattern.test(String(node.accession || '')); +} + +function evaluateNode(node, findings) { + const hasValidFormat = isValidFormat(node); + if (!hasValidFormat) { + addFinding(findings, { + type: 'invalid-accession-format', + severity: 'critical', + nodeId: node.id, + namespace: node.namespace, + accession: node.accession, + message: `${node.namespace || 'unknown namespace'} accession ${node.accession || ''} does not match the namespace format.`, + action: 'fix_accession_namespace', + }); + } + + if (node.deprecatedAlias) { + addFinding(findings, { + type: 'deprecated-accession-alias', + severity: 'major', + nodeId: node.id, + namespace: node.namespace, + accession: node.accession, + canonicalTarget: node.canonicalTarget, + message: 'Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.', + action: 'replace_deprecated_alias', + }); + } + + if (node.expectedTaxon && node.taxon && String(node.expectedTaxon) !== String(node.taxon)) { + addFinding(findings, { + type: 'taxon-mismatch', + severity: 'major', + nodeId: node.id, + namespace: node.namespace, + accession: node.accession, + expectedTaxon: node.expectedTaxon, + taxon: node.taxon, + message: `Taxon ${node.taxon} does not match expected taxon ${node.expectedTaxon}.`, + action: 'resolve_taxon_crosswalk', + }); + } + + if (asArray(node.evidenceDois).length === 0) { + addFinding(findings, { + type: 'missing-doi-evidence', + severity: 'major', + nodeId: node.id, + namespace: node.namespace, + accession: node.accession, + message: 'Crosswalk lacks DOI-backed evidence for the entity-page edge.', + action: 'attach_doi_evidence', + }); + } + + if (hasValidFormat && Number(node.crosswalkConfidence || 0) < 0.8) { + addFinding(findings, { + type: 'low-crosswalk-confidence', + severity: 'major', + nodeId: node.id, + namespace: node.namespace, + accession: node.accession, + confidence: Number(node.crosswalkConfidence || 0), + message: 'Crosswalk confidence is below the safe recommendation threshold.', + action: 'raise_crosswalk_confidence', + }); + } +} + +function evaluateDuplicates(nodes, findings) { + const byTarget = new Map(); + for (const node of nodes) { + if (!node.canonicalTarget) continue; + const list = byTarget.get(node.canonicalTarget) || []; + list.push(node); + byTarget.set(node.canonicalTarget, list); + } + + for (const [canonicalTarget, list] of byTarget.entries()) { + if (list.length < 2) continue; + addFinding(findings, { + type: 'duplicate-canonical-target', + severity: 'critical', + nodeId: list.map((node) => node.id).join(','), + canonicalTarget, + message: `Multiple graph nodes resolve to ${canonicalTarget}.`, + action: 'merge_duplicate_canonical_nodes', + }); + } +} + +function addRecommendationSuppressions(nodes, findings) { + const riskyNodeIds = new Set(findings.map((finding) => finding.nodeId).filter(Boolean)); + for (const node of nodes) { + if (!node.usedInRecommendation || !riskyNodeIds.has(node.id)) continue; + addFinding(findings, { + type: 'unsafe-recommendation-crosswalk', + severity: 'major', + nodeId: node.id, + namespace: node.namespace, + accession: node.accession, + message: 'Recommendation path uses a node with unresolved crosswalk findings.', + action: 'suppress_unsafe_recommendation', + }); + } +} + +function summarize(nodes, findings) { + return { + nodeCount: nodes.length, + formatIssues: findings.filter((finding) => finding.type === 'invalid-accession-format').length, + duplicateTargets: findings.filter((finding) => finding.type === 'duplicate-canonical-target').length, + aliasIssues: findings.filter((finding) => finding.type === 'deprecated-accession-alias').length, + taxonIssues: findings.filter((finding) => finding.type === 'taxon-mismatch').length, + evidenceIssues: findings.filter((finding) => finding.type === 'missing-doi-evidence' || finding.type === 'low-crosswalk-confidence').length, + recommendationSuppressions: findings.filter((finding) => finding.type === 'unsafe-recommendation-crosswalk').length, + }; +} + +function chooseDecision(findings) { + if (findings.some((finding) => CRITICAL_FINDINGS.has(finding.type))) { + return 'hold-for-curation'; + } + if (findings.length > 0) { + return 'needs-curator-review'; + } + return 'approved'; +} + +function calculateReadinessScore(findings) { + const score = findings.reduce((total, finding) => { + if (finding.severity === 'critical') return total - 25; + if (finding.severity === 'major') return total - 15; + return total - 8; + }, 100); + return Math.max(0, score); +} + +function actionMessage(finding) { + const messages = { + fix_accession_namespace: 'Correct the accession namespace or replace the node with a valid identifier.', + merge_duplicate_canonical_nodes: 'Merge duplicate canonical nodes and preserve edge provenance.', + replace_deprecated_alias: 'Replace deprecated aliases with the current canonical accession.', + resolve_taxon_crosswalk: 'Resolve taxon mismatch before recommendations use this node.', + attach_doi_evidence: 'Attach DOI-backed evidence for the crosswalk edge.', + raise_crosswalk_confidence: 'Add stronger evidence or curator approval to raise crosswalk confidence.', + suppress_unsafe_recommendation: 'Suppress recommendations using unresolved crosswalk nodes.', + }; + return messages[finding.action] || finding.message; +} + +function buildRequiredActions(findings) { + const seen = new Set(); + return findings + .map((finding) => ({ + type: finding.action, + nodeId: finding.nodeId, + findingType: finding.type, + message: actionMessage(finding), + })) + .filter((action) => { + const key = `${action.type}:${action.nodeId}:${action.findingType}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +} + +function buildDigest(input) { + return crypto.createHash('sha256').update(JSON.stringify(input)).digest('hex').slice(0, 16); +} + +function evaluateBiologicalAccessionCrosswalk(packet) { + const nodes = asArray(packet.nodes); + const findings = []; + for (const node of nodes) { + evaluateNode(node, findings); + } + evaluateDuplicates(nodes, findings); + addRecommendationSuppressions(nodes, findings); + + const summary = summarize(nodes, findings); + const decision = chooseDecision(findings); + const readinessScore = decision === 'approved' ? 100 : calculateReadinessScore(findings); + + return { + graphId: packet.graphId || 'unknown-graph', + generatedAt: packet.generatedAt || new Date().toISOString(), + decision, + readinessScore, + summary, + findings, + requiredActions: buildRequiredActions(findings), + auditDigest: buildDigest({nodes, findings, summary, decision}), + }; +} + +function buildReviewerPacket(result) { + const lines = [ + '# Biological Accession Crosswalk Guard Report', + '', + `Graph: ${result.graphId}`, + `Generated: ${result.generatedAt}`, + `Decision: ${result.decision}`, + `Readiness score: ${result.readinessScore}`, + `Findings: ${result.findings.length}`, + `Audit digest: ${result.auditDigest}`, + '', + '## Summary', + '', + `- Nodes reviewed: ${result.summary.nodeCount}`, + `- Format issues: ${result.summary.formatIssues}`, + `- Duplicate targets: ${result.summary.duplicateTargets}`, + `- Alias issues: ${result.summary.aliasIssues}`, + `- Taxon issues: ${result.summary.taxonIssues}`, + `- Evidence issues: ${result.summary.evidenceIssues}`, + `- Recommendation suppressions: ${result.summary.recommendationSuppressions}`, + '', + '## Findings', + '', + ]; + + if (result.findings.length === 0) { + lines.push('- No biological accession crosswalk findings.'); + } else { + for (const finding of result.findings) { + lines.push(`- ${finding.severity.toUpperCase()} ${finding.type} for ${finding.nodeId}: ${finding.message}`); + } + } + + lines.push('', '## Required Actions', ''); + if (result.requiredActions.length === 0) { + lines.push('- No curator action required.'); + } else { + for (const action of result.requiredActions) { + lines.push(`- ${action.type} (${action.nodeId}): ${action.message}`); + } + } + + return `${lines.join('\n')}\n`; +} + +module.exports = { + evaluateBiologicalAccessionCrosswalk, + buildReviewerPacket, +}; diff --git a/biological-accession-crosswalk-guard/package.json b/biological-accession-crosswalk-guard/package.json new file mode 100644 index 00000000..31a488e8 --- /dev/null +++ b/biological-accession-crosswalk-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "biological-accession-crosswalk-guard", + "version": "1.0.0", + "private": true, + "description": "Dependency-free biological accession crosswalk guard for SCIBASE issue #17.", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check demo.js && node --check render-video.js && node --check test.js", + "test": "node --test test.js", + "demo": "node demo.js", + "video": "node render-video.js" + } +} diff --git a/biological-accession-crosswalk-guard/render-video.js b/biological-accession-crosswalk-guard/render-video.js new file mode 100644 index 00000000..6d5ab88c --- /dev/null +++ b/biological-accession-crosswalk-guard/render-video.js @@ -0,0 +1,59 @@ +const fs = require('node:fs'); +const path = require('node:path'); +const {spawnSync} = require('node:child_process'); + +const {evaluateBiologicalAccessionCrosswalk} = require('./index'); +const {samplePacket} = require('./sample-data'); + +const REPORT_DIR = path.join(__dirname, 'reports'); + +function resolveFfmpeg() { + if (process.env.FFMPEG_PATH) return process.env.FFMPEG_PATH; + const candidate = path.resolve(__dirname, '..', '..', '..', 'node_modules', 'ffmpeg-static', 'ffmpeg.exe'); + if (fs.existsSync(candidate)) return candidate; + return 'ffmpeg'; +} + +function main() { + fs.mkdirSync(REPORT_DIR, {recursive: true}); + const result = evaluateBiologicalAccessionCrosswalk(samplePacket); + const outPath = path.join(REPORT_DIR, 'demo.mp4'); + const ffmpeg = resolveFfmpeg(); + const scoreWidth = Math.max(24, Math.min(820, Math.round(result.readinessScore * 8.2))); + const findingWidth = Math.max(24, Math.min(820, result.findings.length * 76)); + const actionWidth = Math.max(24, Math.min(820, result.requiredActions.length * 82)); + const filters = [ + 'drawbox=x=52:y=52:w=1176:h=616:color=white@0.13:t=fill', + 'drawbox=x=76:y=76:w=1128:h=568:color=white@0.08:t=fill', + 'drawbox=x=110:y=168:w=820:h=44:color=white@0.28:t=fill', + `drawbox=x=110:y=168:w=${scoreWidth}:h=44:color=0x2f855a@1:t=fill`, + 'drawbox=x=110:y=286:w=820:h=44:color=white@0.28:t=fill', + `drawbox=x=110:y=286:w=${findingWidth}:h=44:color=0xc2410c@1:t=fill`, + 'drawbox=x=110:y=404:w=820:h=44:color=white@0.28:t=fill', + `drawbox=x=110:y=404:w=${actionWidth}:h=44:color=0x1d4ed8@1:t=fill`, + 'drawbox=x=984:y=168:w=140:h=44:color=0x2f855a@1:t=fill', + 'drawbox=x=984:y=286:w=140:h=44:color=0xc2410c@1:t=fill', + 'drawbox=x=984:y=404:w=140:h=44:color=0x1d4ed8@1:t=fill', + 'drawbox=x=110:y=548:w=560:h=38:color=white@0.22:t=fill', + 'drawbox=x=110:y=548:w=470:h=38:color=0xf9ab00@1:t=fill', + ].join(','); + + const resultProcess = spawnSync(ffmpeg, [ + '-y', + '-f', 'lavfi', + '-i', 'color=c=0x12333c:s=1280x720:d=4:r=25', + '-vf', filters, + '-c:v', 'libx264', + '-pix_fmt', 'yuv420p', + '-movflags', '+faststart', + outPath, + ], {stdio: 'inherit'}); + if (resultProcess.status !== 0) { + throw new Error(`ffmpeg exited with ${resultProcess.status}`); + } + console.log(outPath); +} + +if (require.main === module) { + main(); +} diff --git a/biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json b/biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json new file mode 100644 index 00000000..bb790834 --- /dev/null +++ b/biological-accession-crosswalk-guard/reports/accession-crosswalk-packet.json @@ -0,0 +1,144 @@ +{ + "graphId": "kg-biological-accession-demo", + "generatedAt": "2026-05-23T07:00:00Z", + "decision": "hold-for-curation", + "readinessScore": 0, + "summary": { + "nodeCount": 5, + "formatIssues": 1, + "duplicateTargets": 1, + "aliasIssues": 2, + "taxonIssues": 1, + "evidenceIssues": 2, + "recommendationSuppressions": 1 + }, + "findings": [ + { + "severity": "critical", + "type": "invalid-accession-format", + "nodeId": "node-gene-p53-alias", + "namespace": "NCBIGene", + "accession": "TP53", + "message": "NCBIGene accession TP53 does not match the namespace format.", + "action": "fix_accession_namespace" + }, + { + "severity": "major", + "type": "deprecated-accession-alias", + "nodeId": "node-gene-p53-alias", + "namespace": "NCBIGene", + "accession": "TP53", + "canonicalTarget": "NCBIGene:7157", + "message": "Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.", + "action": "replace_deprecated_alias" + }, + { + "severity": "major", + "type": "deprecated-accession-alias", + "nodeId": "node-protein-old", + "namespace": "UniProtKB", + "accession": "Q9Y261", + "canonicalTarget": "UniProtKB:Q9Y261", + "message": "Node uses a deprecated alias that should be replaced by the canonical accession before graph publication.", + "action": "replace_deprecated_alias" + }, + { + "severity": "major", + "type": "taxon-mismatch", + "nodeId": "node-protein-old", + "namespace": "UniProtKB", + "accession": "Q9Y261", + "expectedTaxon": "9606", + "taxon": "10090", + "message": "Taxon 10090 does not match expected taxon 9606.", + "action": "resolve_taxon_crosswalk" + }, + { + "severity": "major", + "type": "missing-doi-evidence", + "nodeId": "node-protein-old", + "namespace": "UniProtKB", + "accession": "Q9Y261", + "message": "Crosswalk lacks DOI-backed evidence for the entity-page edge.", + "action": "attach_doi_evidence" + }, + { + "severity": "major", + "type": "low-crosswalk-confidence", + "nodeId": "node-protein-old", + "namespace": "UniProtKB", + "accession": "Q9Y261", + "confidence": 0.61, + "message": "Crosswalk confidence is below the safe recommendation threshold.", + "action": "raise_crosswalk_confidence" + }, + { + "severity": "critical", + "type": "duplicate-canonical-target", + "nodeId": "node-gene-tp53,node-gene-p53-alias", + "canonicalTarget": "NCBIGene:7157", + "message": "Multiple graph nodes resolve to NCBIGene:7157.", + "action": "merge_duplicate_canonical_nodes" + }, + { + "severity": "major", + "type": "unsafe-recommendation-crosswalk", + "nodeId": "node-gene-p53-alias", + "namespace": "NCBIGene", + "accession": "TP53", + "message": "Recommendation path uses a node with unresolved crosswalk findings.", + "action": "suppress_unsafe_recommendation" + } + ], + "requiredActions": [ + { + "type": "fix_accession_namespace", + "nodeId": "node-gene-p53-alias", + "findingType": "invalid-accession-format", + "message": "Correct the accession namespace or replace the node with a valid identifier." + }, + { + "type": "replace_deprecated_alias", + "nodeId": "node-gene-p53-alias", + "findingType": "deprecated-accession-alias", + "message": "Replace deprecated aliases with the current canonical accession." + }, + { + "type": "replace_deprecated_alias", + "nodeId": "node-protein-old", + "findingType": "deprecated-accession-alias", + "message": "Replace deprecated aliases with the current canonical accession." + }, + { + "type": "resolve_taxon_crosswalk", + "nodeId": "node-protein-old", + "findingType": "taxon-mismatch", + "message": "Resolve taxon mismatch before recommendations use this node." + }, + { + "type": "attach_doi_evidence", + "nodeId": "node-protein-old", + "findingType": "missing-doi-evidence", + "message": "Attach DOI-backed evidence for the crosswalk edge." + }, + { + "type": "raise_crosswalk_confidence", + "nodeId": "node-protein-old", + "findingType": "low-crosswalk-confidence", + "message": "Add stronger evidence or curator approval to raise crosswalk confidence." + }, + { + "type": "merge_duplicate_canonical_nodes", + "nodeId": "node-gene-tp53,node-gene-p53-alias", + "findingType": "duplicate-canonical-target", + "message": "Merge duplicate canonical nodes and preserve edge provenance." + }, + { + "type": "suppress_unsafe_recommendation", + "nodeId": "node-gene-p53-alias", + "findingType": "unsafe-recommendation-crosswalk", + "message": "Suppress recommendations using unresolved crosswalk nodes." + } + ], + "auditDigest": "08f1b33b690fc649" +} diff --git a/biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md b/biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md new file mode 100644 index 00000000..5b68885e --- /dev/null +++ b/biological-accession-crosswalk-guard/reports/accession-crosswalk-report.md @@ -0,0 +1,40 @@ +# Biological Accession Crosswalk Guard Report + +Graph: kg-biological-accession-demo +Generated: 2026-05-23T07:00:00Z +Decision: hold-for-curation +Readiness score: 0 +Findings: 8 +Audit digest: 08f1b33b690fc649 + +## Summary + +- Nodes reviewed: 5 +- Format issues: 1 +- Duplicate targets: 1 +- Alias issues: 2 +- Taxon issues: 1 +- Evidence issues: 2 +- Recommendation suppressions: 1 + +## Findings + +- CRITICAL invalid-accession-format for node-gene-p53-alias: NCBIGene accession TP53 does not match the namespace format. +- MAJOR deprecated-accession-alias for node-gene-p53-alias: Node uses a deprecated alias that should be replaced by the canonical accession before graph publication. +- MAJOR deprecated-accession-alias for node-protein-old: Node uses a deprecated alias that should be replaced by the canonical accession before graph publication. +- MAJOR taxon-mismatch for node-protein-old: Taxon 10090 does not match expected taxon 9606. +- MAJOR missing-doi-evidence for node-protein-old: Crosswalk lacks DOI-backed evidence for the entity-page edge. +- MAJOR low-crosswalk-confidence for node-protein-old: Crosswalk confidence is below the safe recommendation threshold. +- CRITICAL duplicate-canonical-target for node-gene-tp53,node-gene-p53-alias: Multiple graph nodes resolve to NCBIGene:7157. +- MAJOR unsafe-recommendation-crosswalk for node-gene-p53-alias: Recommendation path uses a node with unresolved crosswalk findings. + +## Required Actions + +- fix_accession_namespace (node-gene-p53-alias): Correct the accession namespace or replace the node with a valid identifier. +- replace_deprecated_alias (node-gene-p53-alias): Replace deprecated aliases with the current canonical accession. +- replace_deprecated_alias (node-protein-old): Replace deprecated aliases with the current canonical accession. +- resolve_taxon_crosswalk (node-protein-old): Resolve taxon mismatch before recommendations use this node. +- attach_doi_evidence (node-protein-old): Attach DOI-backed evidence for the crosswalk edge. +- raise_crosswalk_confidence (node-protein-old): Add stronger evidence or curator approval to raise crosswalk confidence. +- merge_duplicate_canonical_nodes (node-gene-tp53,node-gene-p53-alias): Merge duplicate canonical nodes and preserve edge provenance. +- suppress_unsafe_recommendation (node-gene-p53-alias): Suppress recommendations using unresolved crosswalk nodes. diff --git a/biological-accession-crosswalk-guard/reports/demo.mp4 b/biological-accession-crosswalk-guard/reports/demo.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..db53158372327b875487d6bdde69bf6f02971801 GIT binary patch literal 8372 zcmeHMd011&7QaapD2SlA6;ZB=ibx;{k*x-0Q`~SX;DUskQUz+d*XD*@-kHl0gOW`^0GExPk8dKuZbaK<8^IZRdg@nXk~I zL5)gxoBEF4Xxc$Qq6Zb)z?mTL&T?^MyRcatTrL&FbKJQGrv5#oy&Y-~kn*RAF#GoF zm~9l)S(hfy>3xD`jbRK7y%_j|58sZYVJ4Ego99O6{n65bw5`>IRt+@*f}Q~aO4H$G z_GAo04>m+g=~nCk653cF;5hJIKjJ>R^e?rftb($m|5`VHUU zk2n4?_yKK?|E2w5hUNpCUBBP$;OwCDfl9~O!QfmWnsxwq%k&(>3;NB@0Lg@B3_PPd z9ux!||Bu@)FUdcq6*sIB-j82(=nr z3eO8uYPi-$AEH#q;4l%PS4NuxgU~OA(9d><(9eRlnOH-@oi!vw0aCD&vQ2uow`W!b z-u>bBrJ$T0&KL1ZqkeBd_XEzvg(?k+vw_sOu{}7rkS+A`a$^G{$OQykXUz$m%H-m{ zq5dE#B89;4SE&=^q?p1vES4LS!{WF>r-Y)^-mb23ad9r_##G4(rHe`v<%&jekx&Xb z$f(qmRHf8<<3d6}2zhKAZbhCOE+PeTl~BfGd$YV*IH4ru30jiJig)v7#k1LNxPs(K z$aq{EBLK<+S8EeM72bS}h{tweffBrMg*2WN@sUL~Xz(?JGK%D}-EpBrqf!ujP-Wwk zhLp>tTA)1RJw-wa2w}8>#{vk1Xqie$@;Gc~HX9ccT8gjM%A{%}VK5M_=BvbFElKg1 z9GsG9Kts#pz)h=C$p{Iw_??|@xK=I|0+`MY7OvFj2N6mY1ciV|l@zIw6Cecr0(p#v zNZ<=q3N=ANTL?aaj7h2l6CkJ|kc?PEC`c`MOTbqr04){qI6xC3LXD;&;0vUL77ZfR zkot+mkA1q~j@&4qZGg}5+m?4U`^L${#70xaluv6Y- z<6)QGX1ZcRB+6Mjf3!J@KMePLRM$%)*x_jYq?#{4*EVC$}+G|{$xx`M~S zc0=P7?W*6EuKF2e_*uqj>h9%X^vGM+ym5>%_PXE1XZK2+n){}$;Ve5;ZJqIp<0mUD zG8|1DDp&f4+aCG$m#Yf`=BMplqMr;lJ>9t|D8t)t|8coZOXx=DynWLiZ)QF0KXd)D zp97`upZwT2BXpkg{jIm1#KD6Hj!3vUTp${!5YE2L>;`H`woD=7P-Zam9|xx(!VX2`on6)XTd1qRXL| zMp?cewx6_1^OZi^yq5mn(bYZ}En|Ak?uAEXvvE##{+{3ql@Ifr0>(Hsj#`$NBU$yN zyrzDbZ1$zb1<4zy*=jciZGDt%Jt}a2@fvUlefHgXSxaI*Q)GX-q{+TQ;dQYo{pQG> zW;%XJ=C=-0Ey`Gx6-zAUE(*B0HtDp@W6iA@CqA&w|0J9-v@|xitkL$1=}z0@#|JK0 zd84_OzGD1@L$p1&D+j;roRGfTyWpJ&D~E7~$)p#HFGkwbv zS?g2Yrtj82NZ;xiv+&H;fGt*+`~@=M}WN3L_#)rG?9jnexq(K_AZJweuEY zzvG$b9m{RFabXST=J87X?WQzdNDV$Nv&!9-5~=w7=+e*;4{XY)-^>S@p1Dy!=Fz9u zYSX`)Q?l*{(`Uanh}_mnZAXU<-CutAR>JUH)sxDNNe!Z{*WN3rE~;nA1e-YI>cr7E4Tt#YTToVH)8&@9fBh zwO3AEyr)j0-pYEMGDCX*WEC@W*eK_sU(Mx9=`0ni=KRmi`2{tOm(`fLSMq`U5Bg~D z*Os63PgBh=<4Haoy=2kTv~6Y)B?tKRsi*fVDf<;U{pxeR&ROVj`-4S=7rvQRS$Gm> z^xYnP%j+K(L<1etO>}^WHt0ujGq)FgZJWpTx!*JwScC@LiogSl|2eB7Xjd=u`om>a zzUxd#7pqp&Lzas?+X@c{7WlqZz1ix(N?D}I$Em?#+hSL>*sYTF+V&ZfUEj<5PQLS_ zn2}#CKSbPZlX2w%Q{!d5jzkVS-Q*roIjN<>bnm7r`A--YGt_GwuMWAS*Ozq6XM+VF zG7$!58u9rYOZcX>{QPu-{kB|xoqTN4`cnsQ>79ps+VA+JgQuEP&(&B7b+LJS6?av& zr*2HO*fL!Z4|{9;SI!fkxjo$;Sx9SY`0>t>clOx!L1t+M3)}g76A_7i{pVsBHqLjq zgWNy2?a+?SZUgiAn;kZ6-~8QWIrbnt(l^B~J_$a0pL4;epit2nRI$>PAT@%PLQsCW zf0F91F{aueBwX5)&~*@E`Rj1OTRz-XqFt1IjEwDK`dXy6etOiJ${0&jt=O3gz7%Q6d0An`b1-=pIoTe5unKgEHP&l-+!}<5egV^^2)H z)+5ReU*`0P5(%+<_k3?G%5J{A{3?_y4Je=Vh_c!s)M^aMYGYA$`7&krt57ED0i^JZ zK$$Yh2$Z`Wj78by%M`y?p)~mNZ;qEnMWSxLOg0kbn|Rsn-s`y0*nhrz-o(r9H$Bi} zP#}Ov^d?@uiI;zIdj5Oy(#eotzA9caup#K*M*7UsxSZ{8ij;uQn9LIJp}$eS{+tr< z8Iw~!_uuP|j!wQtWs}eS_qy{)z-LTc?DF4h&L9C_X9lTiZ%gm^o`1=Q9Shg*KizB2 z9RXiw?ikhHmSX)9WQObL1j@7Ee7xqY5k9U*b!3fU?QNcu{|X=dvH_kiBx2)V_;9_F U?fqPhhW>31UweWc^VRBq0gu;|yZ`_I literal 0 HcmV?d00001 diff --git a/biological-accession-crosswalk-guard/reports/summary.svg b/biological-accession-crosswalk-guard/reports/summary.svg new file mode 100644 index 00000000..605eab39 --- /dev/null +++ b/biological-accession-crosswalk-guard/reports/summary.svg @@ -0,0 +1,31 @@ + + + + Biological accession crosswalk guard + kg-biological-accession-demo • hold-for-curation + + Readiness score + + + 0/100 + + + Findings + + + 8 + + + Required actions + + + 8 + + Top curator checks + • invalid-accession-format for node-gene-p53-alias + • deprecated-accession-alias for node-gene-p53-alias + • deprecated-accession-alias for node-protein-old + • taxon-mismatch for node-protein-old + • missing-doi-evidence for node-protein-old + Synthetic KG data only. No external ontology, registry, or live recommendation calls. + \ No newline at end of file diff --git a/biological-accession-crosswalk-guard/sample-data.js b/biological-accession-crosswalk-guard/sample-data.js new file mode 100644 index 00000000..7ff6c9e8 --- /dev/null +++ b/biological-accession-crosswalk-guard/sample-data.js @@ -0,0 +1,59 @@ +const samplePacket = { + graphId: 'kg-biological-accession-demo', + generatedAt: '2026-05-23T07:00:00Z', + nodes: [ + { + id: 'node-gene-tp53', + namespace: 'NCBIGene', + accession: '7157', + canonicalTarget: 'NCBIGene:7157', + taxon: '9606', + expectedTaxon: '9606', + evidenceDois: ['10.1016/j.cell.2026.01.001'], + crosswalkConfidence: 0.98, + }, + { + id: 'node-gene-p53-alias', + namespace: 'NCBIGene', + accession: 'TP53', + canonicalTarget: 'NCBIGene:7157', + taxon: '9606', + expectedTaxon: '9606', + deprecatedAlias: true, + evidenceDois: ['10.1016/j.cell.2026.01.001'], + crosswalkConfidence: 0.72, + usedInRecommendation: true, + }, + { + id: 'node-protein-old', + namespace: 'UniProtKB', + accession: 'Q9Y261', + canonicalTarget: 'UniProtKB:Q9Y261', + taxon: '10090', + expectedTaxon: '9606', + deprecatedAlias: true, + evidenceDois: [], + crosswalkConfidence: 0.61, + }, + { + id: 'node-compound-aspirin', + namespace: 'PubChem', + accession: '2244', + canonicalTarget: 'PubChem:2244', + evidenceDois: ['10.1038/s41586-026-0001-2'], + crosswalkConfidence: 0.97, + }, + { + id: 'node-mesh-breast-neoplasms', + namespace: 'MeSH', + accession: 'D001943', + canonicalTarget: 'MeSH:D001943', + evidenceDois: ['10.1126/science.2026.0007'], + crosswalkConfidence: 0.95, + }, + ], +}; + +module.exports = { + samplePacket, +}; diff --git a/biological-accession-crosswalk-guard/test.js b/biological-accession-crosswalk-guard/test.js new file mode 100644 index 00000000..aac9628f --- /dev/null +++ b/biological-accession-crosswalk-guard/test.js @@ -0,0 +1,155 @@ +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { + evaluateBiologicalAccessionCrosswalk, + buildReviewerPacket, +} = require('./index'); + +test('holds malformed accessions and duplicate canonical targets', () => { + const result = evaluateBiologicalAccessionCrosswalk({ + graphId: 'kg-bio-crosswalk-risk', + generatedAt: '2026-05-23T07:00:00Z', + nodes: [ + { + id: 'node-gene-tp53', + namespace: 'NCBIGene', + accession: '7157', + canonicalTarget: 'NCBIGene:7157', + taxon: '9606', + evidenceDois: ['10.1016/j.cell.2026.01.001'], + crosswalkConfidence: 0.98, + }, + { + id: 'node-gene-p53-alias', + namespace: 'NCBIGene', + accession: 'TP53', + canonicalTarget: 'NCBIGene:7157', + taxon: '9606', + evidenceDois: ['10.1016/j.cell.2026.01.001'], + crosswalkConfidence: 0.72, + }, + ], + }); + + assert.equal(result.decision, 'hold-for-curation'); + assert.equal(result.summary.nodeCount, 2); + assert.equal(result.summary.formatIssues, 1); + assert.equal(result.summary.duplicateTargets, 1); + assert.deepEqual( + result.findings.map((finding) => finding.type), + ['invalid-accession-format', 'duplicate-canonical-target'] + ); + assert.equal(result.requiredActions[0].type, 'fix_accession_namespace'); +}); + +test('requires review for deprecated aliases, taxon mismatch, and weak evidence', () => { + const result = evaluateBiologicalAccessionCrosswalk({ + graphId: 'kg-bio-crosswalk-review', + generatedAt: '2026-05-23T07:00:00Z', + nodes: [ + { + id: 'node-protein-old', + namespace: 'UniProtKB', + accession: 'Q9Y261', + canonicalTarget: 'UniProtKB:Q9Y261', + taxon: '10090', + expectedTaxon: '9606', + deprecatedAlias: true, + evidenceDois: [], + crosswalkConfidence: 0.61, + }, + ], + }); + + assert.equal(result.decision, 'needs-curator-review'); + assert.equal(result.summary.aliasIssues, 1); + assert.equal(result.summary.taxonIssues, 1); + assert.equal(result.summary.evidenceIssues, 2); + assert.deepEqual( + result.findings.map((finding) => finding.type), + ['deprecated-accession-alias', 'taxon-mismatch', 'missing-doi-evidence', 'low-crosswalk-confidence'] + ); + assert.equal(result.requiredActions.at(-1).type, 'raise_crosswalk_confidence'); +}); + +test('suppresses unsafe recommendations with unresolved crosswalk findings', () => { + const result = evaluateBiologicalAccessionCrosswalk({ + graphId: 'kg-recommendation-risk', + generatedAt: '2026-05-23T07:00:00Z', + nodes: [ + { + id: 'node-compound-aspirin', + namespace: 'PubChem', + accession: '2244', + canonicalTarget: 'PubChem:2244', + taxon: null, + evidenceDois: ['10.1038/s41586-026-0001-2'], + crosswalkConfidence: 0.97, + }, + { + id: 'node-mesh-bad', + namespace: 'MeSH', + accession: 'bad-mesh', + canonicalTarget: 'MeSH:D001241', + taxon: null, + evidenceDois: ['10.1038/s41586-026-0001-2'], + crosswalkConfidence: 0.93, + usedInRecommendation: true, + }, + ], + }); + + assert.equal(result.decision, 'hold-for-curation'); + assert.equal(result.summary.recommendationSuppressions, 1); + assert.equal(result.findings[0].type, 'invalid-accession-format'); + assert.equal(result.requiredActions.at(-1).type, 'suppress_unsafe_recommendation'); +}); + +test('approves clean crosswalks and builds deterministic reviewer packet', () => { + const result = evaluateBiologicalAccessionCrosswalk({ + graphId: 'kg-ready-crosswalk', + generatedAt: '2026-05-23T07:00:00Z', + nodes: [ + { + id: 'node-gene-brca1', + namespace: 'NCBIGene', + accession: '672', + canonicalTarget: 'NCBIGene:672', + taxon: '9606', + expectedTaxon: '9606', + evidenceDois: ['10.1126/science.2026.0007'], + crosswalkConfidence: 0.99, + }, + { + id: 'node-protein-brca1', + namespace: 'UniProtKB', + accession: 'P38398', + canonicalTarget: 'UniProtKB:P38398', + taxon: '9606', + expectedTaxon: '9606', + evidenceDois: ['10.1126/science.2026.0007'], + crosswalkConfidence: 0.97, + }, + { + id: 'node-mesh-breast-neoplasms', + namespace: 'MeSH', + accession: 'D001943', + canonicalTarget: 'MeSH:D001943', + evidenceDois: ['10.1126/science.2026.0007'], + crosswalkConfidence: 0.95, + }, + ], + }); + + assert.equal(result.decision, 'approved'); + assert.equal(result.readinessScore, 100); + assert.equal(result.findings.length, 0); + + const packet = buildReviewerPacket(result); + assert.match(packet, /# Biological Accession Crosswalk Guard Report/); + assert.match(packet, /Graph: kg-ready-crosswalk/); + assert.match(packet, /Decision: approved/); + assert.match(packet, /Readiness score: 100/); + assert.match(packet, /Findings: 0/); +});