diff --git a/manuscript-certainty-tone-assistant/README.md b/manuscript-certainty-tone-assistant/README.md new file mode 100644 index 00000000..e73deb69 --- /dev/null +++ b/manuscript-certainty-tone-assistant/README.md @@ -0,0 +1,24 @@ +# Manuscript Certainty Tone Assistant + +Self-contained AI peer-review aid for SCIBASE issue #13. + +The assistant checks whether manuscript wording matches the strength of the linked evidence. It flags overconfident causal language, definitive or universal claims, broad population scope from small samples, unsupported significance wording, missing uncertainty intervals, missing hedging, and missing limitation anchors. + +## Run + +```bash +npm run check +npm test +npm run demo +``` + +The demo writes reviewer artifacts to `reports/`: + +- `certainty-tone-packet.json` +- `certainty-tone-report.md` +- `summary.svg` +- `demo.mp4` + +## Scope + +This slice is intentionally narrow. It is not another broad AI research tool suite, evidence summarizer, citation metadata/context/diversity/style checker, manuscript similarity detector, ethics/data availability audit, unit consistency checker, biomethods provenance assistant, or general statistical consistency module. It focuses on one review question: does the manuscript's certainty and tone fit its evidence? diff --git a/manuscript-certainty-tone-assistant/acceptance-notes.md b/manuscript-certainty-tone-assistant/acceptance-notes.md new file mode 100644 index 00000000..0ab4a06d --- /dev/null +++ b/manuscript-certainty-tone-assistant/acceptance-notes.md @@ -0,0 +1,8 @@ +# Acceptance Notes + +- Uses synthetic fixture data only. +- Makes no external API calls. +- Stores no credentials, tokens, private manuscripts, live DOI records, PubMed/arXiv/Crossref data, or payment data. +- Emits deterministic audit digests for reviewer packets. +- Keeps policy thresholds in `sample-data.js` so scientific review teams can tune evidence ranks and wording rules. +- Provides focused tests for pass, revise, and hold outcomes. diff --git a/manuscript-certainty-tone-assistant/demo.js b/manuscript-certainty-tone-assistant/demo.js new file mode 100644 index 00000000..81108a73 --- /dev/null +++ b/manuscript-certainty-tone-assistant/demo.js @@ -0,0 +1,83 @@ +const fs = require("node:fs") +const path = require("node:path") +const { spawnSync } = require("node:child_process") +const { evaluatePortfolio } = require("./index") +const { manuscripts, tonePolicy } = require("./sample-data") + +const reportsDir = path.join(__dirname, "reports") +fs.mkdirSync(reportsDir, { recursive: true }) + +const packet = evaluatePortfolio({ manuscripts, policy: tonePolicy }) +const { summary } = packet + +fs.writeFileSync( + path.join(reportsDir, "certainty-tone-packet.json"), + `${JSON.stringify(packet, null, 2)}\n`, +) + +const markdown = [ + "# Manuscript Certainty Tone Report", + "", + `Generated manuscripts: ${summary.totalManuscripts}`, + `Pass: ${summary.pass}`, + `Revise: ${summary.revise}`, + `Hold: ${summary.hold}`, + `Claims reviewed: ${summary.totalClaims}`, + `Blockers: ${summary.totalBlockers}`, + `Warnings: ${summary.totalWarnings}`, + `Audit digest: \`${packet.audit.digest}\``, + "", + "## Manuscript Decisions", + ...packet.decisions.flatMap((decision) => [ + "", + `### ${decision.id}: ${decision.title}`, + `- Status: ${decision.status}`, + `- Claims: ${decision.summary.claims}`, + `- Blockers: ${decision.summary.blockers}`, + `- Warnings: ${decision.summary.warnings}`, + `- Reviewer actions: ${decision.reviewerActions.map((action) => action.code).join(", ") || "none"}`, + `- First suggested tone: ${decision.claimDecisions[0]?.suggestedTone || "none"}`, + ]), + "", +] + +fs.writeFileSync(path.join(reportsDir, "certainty-tone-report.md"), markdown.join("\n")) + +const svg = ` + + Manuscript Certainty Tone Assistant + AI peer-review guard for evidence-matched scientific wording + + ${summary.pass} + pass + + ${summary.revise} + revise tone + + ${summary.hold} + hold claims + Checks: causal language, definitive wording, universal scope, significance claims, uncertainty intervals, limitations. + Digest ${packet.audit.digest.slice(0, 24)}... + +` + +fs.writeFileSync(path.join(reportsDir, "summary.svg"), svg) + +const ffmpeg = spawnSync("ffmpeg", [ + "-y", + "-f", + "lavfi", + "-i", + "color=c=0x111827:s=960x540:d=5:r=15", + "-vf", + "drawbox=x=48:y=170:w=250:h=150:color=0x15803d@1:t=fill,drawbox=x=355:y=170:w=250:h=150:color=0xb45309@1:t=fill,drawbox=x=662:y=170:w=250:h=150:color=0xbe123c@1:t=fill,drawbox=x=48:y=370:w=864:h=18:color=0x38bdf8@1:t=fill", + "-pix_fmt", + "yuv420p", + path.join(reportsDir, "demo.mp4"), +], { stdio: "ignore" }) + +if (ffmpeg.status !== 0) { + console.warn("ffmpeg video generation failed; summary.svg and JSON/Markdown reports were still generated.") +} + +console.log(`Wrote manuscript certainty tone artifacts to ${reportsDir}`) diff --git a/manuscript-certainty-tone-assistant/index.js b/manuscript-certainty-tone-assistant/index.js new file mode 100644 index 00000000..e309f7dd --- /dev/null +++ b/manuscript-certainty-tone-assistant/index.js @@ -0,0 +1,259 @@ +const crypto = require("node:crypto") + +function stableJson(value) { + if (Array.isArray(value)) { + return `[${value.map(stableJson).join(",")}]` + } + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`).join(",")}}` + } + return JSON.stringify(value) +} + +function digestFor(value) { + return crypto.createHash("sha256").update(stableJson(value)).digest("hex") +} + +function finding(code, severity, message, detail = {}) { + return { code, severity, message, detail } +} + +function hasText(value) { + return typeof value === "string" && value.trim().length > 0 +} + +function includesAny(text, phrases) { + return phrases.some((phrase) => { + const escaped = phrase.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + return new RegExp(`(^|[^a-z0-9])${escaped}([^a-z0-9]|$)`, "i").test(text) + }) +} + +function evidenceById(manuscript) { + return new Map(manuscript.evidence.map((item) => [item.id, item])) +} + +function strongestEvidenceRank(claim, manuscript, policy) { + const byId = evidenceById(manuscript) + const ranks = claim.evidenceIds + .map((id) => byId.get(id)) + .filter(Boolean) + .map((item) => policy.evidenceRank[item.strength] || 0) + + return ranks.length === 0 ? 0 : Math.max(...ranks) +} + +function evidenceLabels(claim, manuscript) { + const byId = evidenceById(manuscript) + return claim.evidenceIds + .map((id) => byId.get(id)) + .filter(Boolean) + .map((item) => item.strength) +} + +function evaluateClaimCertainty(claim, manuscript, policy) { + const findings = [] + const evidenceRank = strongestEvidenceRank(claim, manuscript, policy) + const evidenceKinds = evidenceLabels(claim, manuscript) + const text = claim.text || "" + const certaintyHit = includesAny(text, policy.overconfidentTerms) + const universalHit = includesAny(text, policy.universalTerms) + const causalHit = claim.intent === "causal" || includesAny(text, policy.causalTerms) + const significanceHit = includesAny(text, policy.significanceTerms) + const uncertaintyHit = includesAny(text, policy.uncertaintyTerms) + const broadScopeHit = includesAny(text, policy.broadScopeTerms) + + if (claim.evidenceIds.length === 0) { + findings.push(finding("CLAIM_EVIDENCE_MISSING", "blocker", "Claim has no linked evidence span.", { + claimId: claim.id, + })) + } + + if (causalHit && evidenceRank < policy.minEvidenceRankForCausalClaim) { + findings.push(finding("CAUSAL_LANGUAGE_UNDERPOWERED", "blocker", "Causal language is stronger than the linked study design supports.", { + claimId: claim.id, + evidenceKinds, + requiredRank: policy.minEvidenceRankForCausalClaim, + })) + } + + if (certaintyHit && evidenceRank < policy.minEvidenceRankForDefinitiveClaim) { + findings.push(finding("DEFINITIVE_TONE_UNSUPPORTED", "blocker", "Definitive manuscript wording is not supported by high-strength evidence.", { + claimId: claim.id, + evidenceKinds, + })) + } + + if (universalHit && (!claim.replicationContexts || claim.replicationContexts.length < policy.minContextsForUniversalClaim)) { + findings.push(finding("UNIVERSAL_SCOPE_NOT_REPLICATED", "warning", "Universal or population-wide wording needs more replication context.", { + claimId: claim.id, + contexts: claim.replicationContexts || [], + })) + } + + if (broadScopeHit && claim.sampleSize < policy.minSampleForBroadPopulationClaim) { + findings.push(finding("BROAD_SCOPE_SAMPLE_TOO_SMALL", "warning", "Broad population wording is not matched by the sample size.", { + claimId: claim.id, + sampleSize: claim.sampleSize, + requiredSample: policy.minSampleForBroadPopulationClaim, + })) + } + + if (significanceHit && typeof claim.pValue !== "number") { + findings.push(finding("SIGNIFICANCE_STATISTIC_MISSING", "blocker", "Significance wording needs a reported p-value or equivalent statistical test.", { + claimId: claim.id, + })) + } + + if (typeof claim.pValue === "number" && claim.pValue >= policy.alpha) { + findings.push(finding("SIGNIFICANCE_LANGUAGE_CONFLICT", "blocker", "Manuscript says the result is significant but the p-value is not below the policy alpha.", { + claimId: claim.id, + pValue: claim.pValue, + alpha: policy.alpha, + })) + } + + if (significanceHit && !claim.confidenceInterval) { + findings.push(finding("UNCERTAINTY_INTERVAL_MISSING", "warning", "Significance wording should include a confidence or credible interval for calibration.", { + claimId: claim.id, + })) + } + + if (evidenceRank <= policy.lowEvidenceRank && !uncertaintyHit) { + findings.push(finding("UNCERTAINTY_LANGUAGE_MISSING", "warning", "Lower-strength evidence should use calibrated uncertainty language.", { + claimId: claim.id, + evidenceKinds, + })) + } + + if (!claim.limitationsLinked && (certaintyHit || causalHit || broadScopeHit)) { + findings.push(finding("LIMITATION_LINK_MISSING", "warning", "Strongly worded claim should link to a limitation or boundary note.", { + claimId: claim.id, + })) + } + + return findings +} + +function suggestedTone(claim, findings) { + const codes = new Set(findings.map((item) => item.code)) + + if (codes.has("CAUSAL_LANGUAGE_UNDERPOWERED")) { + return claim.text + .replace(/\bproves?\b/gi, "is associated with") + .replace(/\bcauses?\b/gi, "is associated with") + .replace(/\beliminates?\b/gi, "may reduce") + } + if (codes.has("DEFINITIVE_TONE_UNSUPPORTED")) { + return claim.text + .replace(/\bdefinitively\b/gi, "with current evidence") + .replace(/\balways\b/gi, "in the tested settings") + .replace(/\bguarantees?\b/gi, "may support") + } + if (codes.has("BROAD_SCOPE_SAMPLE_TOO_SMALL") || codes.has("UNIVERSAL_SCOPE_NOT_REPLICATED")) { + return `${claim.text} Boundary: this statement should be limited to the studied cohort and replication settings.` + } + if (codes.has("UNCERTAINTY_LANGUAGE_MISSING")) { + return `Current evidence suggests that ${claim.text.charAt(0).toLowerCase()}${claim.text.slice(1)}` + } + return claim.text +} + +function buildReviewerActions(claimDecisions) { + const actions = [] + const allFindings = claimDecisions.flatMap((decision) => decision.findings) + const codes = new Set(allFindings.map((item) => item.code)) + + if (codes.has("CLAIM_EVIDENCE_MISSING")) { + actions.push({ code: "ADD_EVIDENCE_ANCHORS", owner: "author", message: "Link every strong manuscript claim to source evidence spans before AI review signoff." }) + } + if (codes.has("CAUSAL_LANGUAGE_UNDERPOWERED") || codes.has("DEFINITIVE_TONE_UNSUPPORTED")) { + actions.push({ code: "CALIBRATE_STRONG_CLAIMS", owner: "author", message: "Rewrite causal or definitive statements to match the study design and evidence strength." }) + } + if (codes.has("SIGNIFICANCE_STATISTIC_MISSING") || codes.has("SIGNIFICANCE_LANGUAGE_CONFLICT") || codes.has("UNCERTAINTY_INTERVAL_MISSING")) { + actions.push({ code: "REPAIR_STATISTICAL_TONE", owner: "statistics-reviewer", message: "Align significance language with reported tests, alpha threshold, and uncertainty intervals." }) + } + if (codes.has("BROAD_SCOPE_SAMPLE_TOO_SMALL") || codes.has("UNIVERSAL_SCOPE_NOT_REPLICATED")) { + actions.push({ code: "NARROW_POPULATION_SCOPE", owner: "domain-reviewer", message: "Limit population-wide language to the studied cohort or add replication evidence." }) + } + if (codes.has("LIMITATION_LINK_MISSING") || codes.has("UNCERTAINTY_LANGUAGE_MISSING")) { + actions.push({ code: "ADD_LIMITATIONS_AND_HEDGING", owner: "author", message: "Add limitation anchors and calibrated uncertainty language for lower-strength evidence." }) + } + + return actions +} + +function evaluateManuscript(manuscript, policy) { + const claimDecisions = manuscript.claims.map((claim) => { + const findings = evaluateClaimCertainty(claim, manuscript, policy) + const blockerCount = findings.filter((item) => item.severity === "blocker").length + const warningCount = findings.filter((item) => item.severity === "warning").length + + return { + id: claim.id, + section: claim.section, + intent: claim.intent, + text: claim.text, + status: blockerCount > 0 ? "hold" : warningCount > 0 ? "revise" : "pass", + suggestedTone: suggestedTone(claim, findings), + evidenceStrengths: evidenceLabels(claim, manuscript), + findings, + auditDigest: digestFor({ claim, findings }), + } + }) + + const allFindings = claimDecisions.flatMap((decision) => decision.findings) + const blockerCount = allFindings.filter((item) => item.severity === "blocker").length + const warningCount = allFindings.filter((item) => item.severity === "warning").length + const status = blockerCount > 0 ? "hold" : warningCount > 0 ? "revise" : "pass" + const reviewerActions = buildReviewerActions(claimDecisions) + + return { + id: manuscript.id, + title: manuscript.title, + field: manuscript.field, + status, + summary: { + claims: claimDecisions.length, + passedClaims: claimDecisions.filter((decision) => decision.status === "pass").length, + claimsNeedingRevision: claimDecisions.filter((decision) => decision.status === "revise").length, + heldClaims: claimDecisions.filter((decision) => decision.status === "hold").length, + blockers: blockerCount, + warnings: warningCount, + }, + claimDecisions, + reviewerActions, + auditDigest: digestFor({ manuscript, claimDecisions, reviewerActions }), + } +} + +function evaluatePortfolio({ manuscripts, policy }) { + const decisions = manuscripts.map((manuscript) => evaluateManuscript(manuscript, policy)) + const summary = { + totalManuscripts: decisions.length, + pass: decisions.filter((decision) => decision.status === "pass").length, + revise: decisions.filter((decision) => decision.status === "revise").length, + hold: decisions.filter((decision) => decision.status === "hold").length, + totalClaims: decisions.reduce((sum, decision) => sum + decision.summary.claims, 0), + totalBlockers: decisions.reduce((sum, decision) => sum + decision.summary.blockers, 0), + totalWarnings: decisions.reduce((sum, decision) => sum + decision.summary.warnings, 0), + } + + return { + generatedAt: new Date("2026-05-22T20:00:00.000Z").toISOString(), + assistant: "manuscript-certainty-tone-assistant", + issue: "SCIBASE-AI/SCIBASE.AI#13", + summary, + decisions, + audit: { + digest: digestFor({ summary, decisions }), + }, + } +} + +module.exports = { + digestFor, + evaluateClaimCertainty, + evaluateManuscript, + evaluatePortfolio, +} diff --git a/manuscript-certainty-tone-assistant/package.json b/manuscript-certainty-tone-assistant/package.json new file mode 100644 index 00000000..878e4580 --- /dev/null +++ b/manuscript-certainty-tone-assistant/package.json @@ -0,0 +1,11 @@ +{ + "name": "manuscript-certainty-tone-assistant", + "version": "1.0.0", + "private": true, + "type": "commonjs", + "scripts": { + "check": "node --check index.js && node --check sample-data.js && node --check test.js && node --check demo.js", + "test": "node test.js", + "demo": "node demo.js" + } +} diff --git a/manuscript-certainty-tone-assistant/reports/certainty-tone-packet.json b/manuscript-certainty-tone-assistant/reports/certainty-tone-packet.json new file mode 100644 index 00000000..d257b845 --- /dev/null +++ b/manuscript-certainty-tone-assistant/reports/certainty-tone-packet.json @@ -0,0 +1,332 @@ +{ + "generatedAt": "2026-05-22T20:00:00.000Z", + "assistant": "manuscript-certainty-tone-assistant", + "issue": "SCIBASE-AI/SCIBASE.AI#13", + "summary": { + "totalManuscripts": 4, + "pass": 1, + "revise": 1, + "hold": 2, + "totalClaims": 4, + "totalBlockers": 5, + "totalWarnings": 9 + }, + "decisions": [ + { + "id": "MT-PASS-001", + "title": "Calibrated language for randomized medication adherence trial", + "field": "clinical informatics", + "status": "pass", + "summary": { + "claims": 1, + "passedClaims": 1, + "claimsNeedingRevision": 0, + "heldClaims": 0, + "blockers": 0, + "warnings": 0 + }, + "claimDecisions": [ + { + "id": "CLM-1", + "section": "Results", + "intent": "causal", + "text": "The intervention may reduce missed medication reminders in this cohort.", + "status": "pass", + "suggestedTone": "The intervention may reduce missed medication reminders in this cohort.", + "evidenceStrengths": [ + "randomized-controlled", + "controlled-experiment" + ], + "findings": [], + "auditDigest": "dbeb684bb327150322ab80d9160f35275fd581ccc1a085656999beaa1bde7f59" + } + ], + "reviewerActions": [], + "auditDigest": "7c41cd3da852392ef2c662b75f5ff99d4b991f269db9894877bbc2db21581579" + }, + { + "id": "MT-HOLD-002", + "title": "Observational nutrition dashboard draft", + "field": "public health", + "status": "hold", + "summary": { + "claims": 1, + "passedClaims": 0, + "claimsNeedingRevision": 0, + "heldClaims": 1, + "blockers": 3, + "warnings": 4 + }, + "claimDecisions": [ + { + "id": "CLM-2", + "section": "Abstract", + "intent": "causal", + "text": "The dashboard proves the intervention eliminates readmission risk for all patients.", + "status": "hold", + "suggestedTone": "The dashboard is associated with the intervention may reduce readmission risk for all patients.", + "evidenceStrengths": [ + "observational" + ], + "findings": [ + { + "code": "CAUSAL_LANGUAGE_UNDERPOWERED", + "severity": "blocker", + "message": "Causal language is stronger than the linked study design supports.", + "detail": { + "claimId": "CLM-2", + "evidenceKinds": [ + "observational" + ], + "requiredRank": 5 + } + }, + { + "code": "DEFINITIVE_TONE_UNSUPPORTED", + "severity": "blocker", + "message": "Definitive manuscript wording is not supported by high-strength evidence.", + "detail": { + "claimId": "CLM-2", + "evidenceKinds": [ + "observational" + ] + } + }, + { + "code": "UNIVERSAL_SCOPE_NOT_REPLICATED", + "severity": "warning", + "message": "Universal or population-wide wording needs more replication context.", + "detail": { + "claimId": "CLM-2", + "contexts": [ + "single-hospital" + ] + } + }, + { + "code": "BROAD_SCOPE_SAMPLE_TOO_SMALL", + "severity": "warning", + "message": "Broad population wording is not matched by the sample size.", + "detail": { + "claimId": "CLM-2", + "sampleSize": 47, + "requiredSample": 120 + } + }, + { + "code": "SIGNIFICANCE_LANGUAGE_CONFLICT", + "severity": "blocker", + "message": "Manuscript says the result is significant but the p-value is not below the policy alpha.", + "detail": { + "claimId": "CLM-2", + "pValue": 0.08, + "alpha": 0.05 + } + }, + { + "code": "UNCERTAINTY_LANGUAGE_MISSING", + "severity": "warning", + "message": "Lower-strength evidence should use calibrated uncertainty language.", + "detail": { + "claimId": "CLM-2", + "evidenceKinds": [ + "observational" + ] + } + }, + { + "code": "LIMITATION_LINK_MISSING", + "severity": "warning", + "message": "Strongly worded claim should link to a limitation or boundary note.", + "detail": { + "claimId": "CLM-2" + } + } + ], + "auditDigest": "1f6ee76f1b5f405fd53e1901c5b2fc0b2d9ad28983d58179fd1debfb909426ef" + } + ], + "reviewerActions": [ + { + "code": "CALIBRATE_STRONG_CLAIMS", + "owner": "author", + "message": "Rewrite causal or definitive statements to match the study design and evidence strength." + }, + { + "code": "REPAIR_STATISTICAL_TONE", + "owner": "statistics-reviewer", + "message": "Align significance language with reported tests, alpha threshold, and uncertainty intervals." + }, + { + "code": "NARROW_POPULATION_SCOPE", + "owner": "domain-reviewer", + "message": "Limit population-wide language to the studied cohort or add replication evidence." + }, + { + "code": "ADD_LIMITATIONS_AND_HEDGING", + "owner": "author", + "message": "Add limitation anchors and calibrated uncertainty language for lower-strength evidence." + } + ], + "auditDigest": "29968f13189fb1e1beb70e589da8df9a7fb35cf286f6fabc6fe094ace033cb7c" + }, + { + "id": "MT-REV-003", + "title": "Pilot assay automation manuscript", + "field": "bioengineering", + "status": "revise", + "summary": { + "claims": 1, + "passedClaims": 0, + "claimsNeedingRevision": 1, + "heldClaims": 0, + "blockers": 0, + "warnings": 2 + }, + "claimDecisions": [ + { + "id": "CLM-3", + "section": "Discussion", + "intent": "associational", + "text": "The workflow shows a significant improvement in calibration throughput for researchers.", + "status": "revise", + "suggestedTone": "The workflow shows a significant improvement in calibration throughput for researchers. Boundary: this statement should be limited to the studied cohort and replication settings.", + "evidenceStrengths": [ + "controlled-experiment" + ], + "findings": [ + { + "code": "BROAD_SCOPE_SAMPLE_TOO_SMALL", + "severity": "warning", + "message": "Broad population wording is not matched by the sample size.", + "detail": { + "claimId": "CLM-3", + "sampleSize": 32, + "requiredSample": 120 + } + }, + { + "code": "UNCERTAINTY_INTERVAL_MISSING", + "severity": "warning", + "message": "Significance wording should include a confidence or credible interval for calibration.", + "detail": { + "claimId": "CLM-3" + } + } + ], + "auditDigest": "01449394f0202dbf0a877b514280e0bc1401995a50dbfe5a5164513b49c015af" + } + ], + "reviewerActions": [ + { + "code": "REPAIR_STATISTICAL_TONE", + "owner": "statistics-reviewer", + "message": "Align significance language with reported tests, alpha threshold, and uncertainty intervals." + }, + { + "code": "NARROW_POPULATION_SCOPE", + "owner": "domain-reviewer", + "message": "Limit population-wide language to the studied cohort or add replication evidence." + } + ], + "auditDigest": "5797f1a5b24209e619a96f3a0fa58ec43c06a2d4047336cd5f74753988672687" + }, + { + "id": "MT-HOLD-004", + "title": "Citation-free AI literature overview", + "field": "machine learning", + "status": "hold", + "summary": { + "claims": 1, + "passedClaims": 0, + "claimsNeedingRevision": 0, + "heldClaims": 1, + "blockers": 2, + "warnings": 3 + }, + "claimDecisions": [ + { + "id": "CLM-4", + "section": "Key findings", + "intent": "summative", + "text": "The method definitively settles benchmark uncertainty across every model family.", + "status": "hold", + "suggestedTone": "The method with current evidence settles benchmark uncertainty across every model family.", + "evidenceStrengths": [], + "findings": [ + { + "code": "CLAIM_EVIDENCE_MISSING", + "severity": "blocker", + "message": "Claim has no linked evidence span.", + "detail": { + "claimId": "CLM-4" + } + }, + { + "code": "DEFINITIVE_TONE_UNSUPPORTED", + "severity": "blocker", + "message": "Definitive manuscript wording is not supported by high-strength evidence.", + "detail": { + "claimId": "CLM-4", + "evidenceKinds": [] + } + }, + { + "code": "UNIVERSAL_SCOPE_NOT_REPLICATED", + "severity": "warning", + "message": "Universal or population-wide wording needs more replication context.", + "detail": { + "claimId": "CLM-4", + "contexts": [] + } + }, + { + "code": "UNCERTAINTY_LANGUAGE_MISSING", + "severity": "warning", + "message": "Lower-strength evidence should use calibrated uncertainty language.", + "detail": { + "claimId": "CLM-4", + "evidenceKinds": [] + } + }, + { + "code": "LIMITATION_LINK_MISSING", + "severity": "warning", + "message": "Strongly worded claim should link to a limitation or boundary note.", + "detail": { + "claimId": "CLM-4" + } + } + ], + "auditDigest": "74b6bf15fae267a68055929ec490e66bac8f0b322801d64c40e714026a51f3fc" + } + ], + "reviewerActions": [ + { + "code": "ADD_EVIDENCE_ANCHORS", + "owner": "author", + "message": "Link every strong manuscript claim to source evidence spans before AI review signoff." + }, + { + "code": "CALIBRATE_STRONG_CLAIMS", + "owner": "author", + "message": "Rewrite causal or definitive statements to match the study design and evidence strength." + }, + { + "code": "NARROW_POPULATION_SCOPE", + "owner": "domain-reviewer", + "message": "Limit population-wide language to the studied cohort or add replication evidence." + }, + { + "code": "ADD_LIMITATIONS_AND_HEDGING", + "owner": "author", + "message": "Add limitation anchors and calibrated uncertainty language for lower-strength evidence." + } + ], + "auditDigest": "7ab033902bc2057a65ee8fa87dafda7238cb623f3466d1486a02ce47bf8cb0d7" + } + ], + "audit": { + "digest": "c7c97bc2d4fd068a989c3d30c82a0858f42d97d7ea3a51b6829d7064e8f76a1a" + } +} diff --git a/manuscript-certainty-tone-assistant/reports/certainty-tone-report.md b/manuscript-certainty-tone-assistant/reports/certainty-tone-report.md new file mode 100644 index 00000000..161b0694 --- /dev/null +++ b/manuscript-certainty-tone-assistant/reports/certainty-tone-report.md @@ -0,0 +1,44 @@ +# Manuscript Certainty Tone Report + +Generated manuscripts: 4 +Pass: 1 +Revise: 1 +Hold: 2 +Claims reviewed: 4 +Blockers: 5 +Warnings: 9 +Audit digest: `c7c97bc2d4fd068a989c3d30c82a0858f42d97d7ea3a51b6829d7064e8f76a1a` + +## Manuscript Decisions + +### MT-PASS-001: Calibrated language for randomized medication adherence trial +- Status: pass +- Claims: 1 +- Blockers: 0 +- Warnings: 0 +- Reviewer actions: none +- First suggested tone: The intervention may reduce missed medication reminders in this cohort. + +### MT-HOLD-002: Observational nutrition dashboard draft +- Status: hold +- Claims: 1 +- Blockers: 3 +- Warnings: 4 +- Reviewer actions: CALIBRATE_STRONG_CLAIMS, REPAIR_STATISTICAL_TONE, NARROW_POPULATION_SCOPE, ADD_LIMITATIONS_AND_HEDGING +- First suggested tone: The dashboard is associated with the intervention may reduce readmission risk for all patients. + +### MT-REV-003: Pilot assay automation manuscript +- Status: revise +- Claims: 1 +- Blockers: 0 +- Warnings: 2 +- Reviewer actions: REPAIR_STATISTICAL_TONE, NARROW_POPULATION_SCOPE +- First suggested tone: The workflow shows a significant improvement in calibration throughput for researchers. Boundary: this statement should be limited to the studied cohort and replication settings. + +### MT-HOLD-004: Citation-free AI literature overview +- Status: hold +- Claims: 1 +- Blockers: 2 +- Warnings: 3 +- Reviewer actions: ADD_EVIDENCE_ANCHORS, CALIBRATE_STRONG_CLAIMS, NARROW_POPULATION_SCOPE, ADD_LIMITATIONS_AND_HEDGING +- First suggested tone: The method with current evidence settles benchmark uncertainty across every model family. diff --git a/manuscript-certainty-tone-assistant/reports/demo.mp4 b/manuscript-certainty-tone-assistant/reports/demo.mp4 new file mode 100644 index 00000000..4979480f Binary files /dev/null and b/manuscript-certainty-tone-assistant/reports/demo.mp4 differ diff --git a/manuscript-certainty-tone-assistant/reports/summary.svg b/manuscript-certainty-tone-assistant/reports/summary.svg new file mode 100644 index 00000000..4cbbbc5a --- /dev/null +++ b/manuscript-certainty-tone-assistant/reports/summary.svg @@ -0,0 +1,16 @@ + + + Manuscript Certainty Tone Assistant + AI peer-review guard for evidence-matched scientific wording + + 1 + pass + + 1 + revise tone + + 2 + hold claims + Checks: causal language, definitive wording, universal scope, significance claims, uncertainty intervals, limitations. + Digest c7c97bc2d4fd068a989c3d30... + diff --git a/manuscript-certainty-tone-assistant/requirements-map.md b/manuscript-certainty-tone-assistant/requirements-map.md new file mode 100644 index 00000000..0166bb0e --- /dev/null +++ b/manuscript-certainty-tone-assistant/requirements-map.md @@ -0,0 +1,16 @@ +# Requirements Map + +Issue: SCIBASE-AI/SCIBASE.AI#13, AI-Assisted Research Tools MVP. + +| Requirement | Coverage | +|---|---| +| AI Peer Review Aid | `evaluatePortfolio()` and `evaluateManuscript()` produce reviewer-ready decisions for draft manuscripts. | +| Grammar, clarity, and tone analysis | `evaluateClaimCertainty()` checks overconfident terms, universal scope, causal language, and missing uncertainty language. | +| Statistical error detection | Significance wording is checked against p-values, alpha threshold, and confidence interval coverage. | +| Compliance checks | Strong claims are required to link limitation anchors and evidence spans before reviewer signoff. | +| Customizable templates | `tonePolicy` centralizes thresholds, evidence ranks, phrase lists, alpha, replication context, and sample-size limits. | +| Immediate MVP value | `npm run demo` emits JSON, Markdown, SVG, and MP4 artifacts that a reviewer can inspect without external services. | + +## Non-Overlap + +This module does not implement summarization, citation recommendation, citation formatting, citation context fit, manuscript similarity triage, ethics/data availability checks, unit consistency, biomethods provenance, image integrity, or broad statistical review. It calibrates wording strength against evidence strength and reviewer policy. diff --git a/manuscript-certainty-tone-assistant/sample-data.js b/manuscript-certainty-tone-assistant/sample-data.js new file mode 100644 index 00000000..afd39d39 --- /dev/null +++ b/manuscript-certainty-tone-assistant/sample-data.js @@ -0,0 +1,114 @@ +const tonePolicy = { + alpha: 0.05, + evidenceRank: { + "expert-opinion": 1, + "case-series": 2, + "observational": 3, + "controlled-experiment": 4, + "randomized-controlled": 5, + "systematic-review": 6, + }, + lowEvidenceRank: 3, + minEvidenceRankForCausalClaim: 5, + minEvidenceRankForDefinitiveClaim: 5, + minContextsForUniversalClaim: 3, + minSampleForBroadPopulationClaim: 120, + overconfidentTerms: ["proves", "definitively", "guarantees", "eliminates", "settles"], + universalTerms: ["always", "all patients", "all users", "never fails", "across every"], + causalTerms: ["causes", "caused", "prevents", "eliminates", "drives", "leads to"], + significanceTerms: ["significant", "statistically significant", "robust effect"], + uncertaintyTerms: ["may", "might", "suggests", "is associated with", "appears", "in this cohort"], + broadScopeTerms: ["patients", "clinicians", "researchers", "all users", "population-wide"], +} + +const manuscripts = [ + { + id: "MT-PASS-001", + title: "Calibrated language for randomized medication adherence trial", + field: "clinical informatics", + evidence: [ + { id: "EV-RCT-PRIMARY", strength: "randomized-controlled", description: "Two-arm randomized trial with prespecified endpoint." }, + { id: "EV-REPLICATION", strength: "controlled-experiment", description: "External site replication packet." }, + ], + claims: [ + { + id: "CLM-1", + section: "Results", + intent: "causal", + text: "The intervention may reduce missed medication reminders in this cohort.", + evidenceIds: ["EV-RCT-PRIMARY", "EV-REPLICATION"], + sampleSize: 214, + pValue: 0.018, + confidenceInterval: "risk ratio 0.74, 95% CI 0.60-0.91", + replicationContexts: ["clinic-a", "clinic-b", "clinic-c"], + limitationsLinked: true, + }, + ], + }, + { + id: "MT-HOLD-002", + title: "Observational nutrition dashboard draft", + field: "public health", + evidence: [ + { id: "EV-OBS-COHORT", strength: "observational", description: "Single-site retrospective cohort." }, + ], + claims: [ + { + id: "CLM-2", + section: "Abstract", + intent: "causal", + text: "The dashboard proves the intervention eliminates readmission risk for all patients.", + evidenceIds: ["EV-OBS-COHORT"], + sampleSize: 47, + pValue: 0.08, + confidenceInterval: null, + replicationContexts: ["single-hospital"], + limitationsLinked: false, + }, + ], + }, + { + id: "MT-REV-003", + title: "Pilot assay automation manuscript", + field: "bioengineering", + evidence: [ + { id: "EV-PILOT", strength: "controlled-experiment", description: "Bench assay pilot with two batches." }, + ], + claims: [ + { + id: "CLM-3", + section: "Discussion", + intent: "associational", + text: "The workflow shows a significant improvement in calibration throughput for researchers.", + evidenceIds: ["EV-PILOT"], + sampleSize: 32, + pValue: 0.031, + confidenceInterval: null, + replicationContexts: ["bench-a", "bench-b"], + limitationsLinked: true, + }, + ], + }, + { + id: "MT-HOLD-004", + title: "Citation-free AI literature overview", + field: "machine learning", + evidence: [], + claims: [ + { + id: "CLM-4", + section: "Key findings", + intent: "summative", + text: "The method definitively settles benchmark uncertainty across every model family.", + evidenceIds: [], + sampleSize: 0, + pValue: null, + confidenceInterval: null, + replicationContexts: [], + limitationsLinked: false, + }, + ], + }, +] + +module.exports = { manuscripts, tonePolicy } diff --git a/manuscript-certainty-tone-assistant/test.js b/manuscript-certainty-tone-assistant/test.js new file mode 100644 index 00000000..6c894d5e --- /dev/null +++ b/manuscript-certainty-tone-assistant/test.js @@ -0,0 +1,41 @@ +const assert = require("node:assert/strict") +const { evaluateManuscript, evaluatePortfolio } = require("./index") +const { manuscripts, tonePolicy } = require("./sample-data") + +const packet = evaluatePortfolio({ manuscripts, policy: tonePolicy }) + +assert.equal(packet.summary.totalManuscripts, 4) +assert.equal(packet.summary.pass, 1) +assert.equal(packet.summary.revise, 1) +assert.equal(packet.summary.hold, 2) +assert.equal(packet.summary.totalClaims, 4) +assert.equal(packet.summary.totalBlockers, 5) +assert.equal(packet.summary.totalWarnings, 9) +assert.match(packet.audit.digest, /^[a-f0-9]{64}$/) + +const pass = packet.decisions.find((decision) => decision.id === "MT-PASS-001") +assert.equal(pass.status, "pass") +assert.equal(pass.summary.passedClaims, 1) +assert.equal(pass.reviewerActions.length, 0) + +const overclaim = evaluateManuscript(manuscripts.find((item) => item.id === "MT-HOLD-002"), tonePolicy) +assert.equal(overclaim.status, "hold") +assert.ok(overclaim.claimDecisions[0].findings.some((finding) => finding.code === "CAUSAL_LANGUAGE_UNDERPOWERED")) +assert.ok(overclaim.claimDecisions[0].findings.some((finding) => finding.code === "DEFINITIVE_TONE_UNSUPPORTED")) +assert.ok(overclaim.claimDecisions[0].findings.some((finding) => finding.code === "SIGNIFICANCE_LANGUAGE_CONFLICT")) +assert.ok(overclaim.claimDecisions[0].findings.some((finding) => finding.code === "BROAD_SCOPE_SAMPLE_TOO_SMALL")) +assert.ok(overclaim.reviewerActions.some((action) => action.code === "CALIBRATE_STRONG_CLAIMS")) + +const pilot = packet.decisions.find((decision) => decision.id === "MT-REV-003") +assert.equal(pilot.status, "revise") +assert.ok(pilot.claimDecisions[0].findings.some((finding) => finding.code === "UNCERTAINTY_INTERVAL_MISSING")) +assert.ok(pilot.claimDecisions[0].findings.some((finding) => finding.code === "BROAD_SCOPE_SAMPLE_TOO_SMALL")) +assert.equal(pilot.reviewerActions[0].code, "REPAIR_STATISTICAL_TONE") + +const unsupported = packet.decisions.find((decision) => decision.id === "MT-HOLD-004") +assert.equal(unsupported.status, "hold") +assert.ok(unsupported.claimDecisions[0].findings.some((finding) => finding.code === "CLAIM_EVIDENCE_MISSING")) +assert.ok(unsupported.claimDecisions[0].findings.some((finding) => finding.code === "DEFINITIVE_TONE_UNSUPPORTED")) +assert.ok(unsupported.reviewerActions.some((action) => action.code === "ADD_EVIDENCE_ANCHORS")) + +console.log("manuscript-certainty-tone-assistant tests passed")