diff --git a/apps/website/app/api/ai/extract/route.ts b/apps/website/app/api/ai/extract/route.ts new file mode 100644 index 000000000..fd14394f6 --- /dev/null +++ b/apps/website/app/api/ai/extract/route.ts @@ -0,0 +1,201 @@ +import { NextRequest, NextResponse } from "next/server"; +import { + ExtractionRequestSchema, + EXTRACTION_RESULT_JSON_SCHEMA, + type ExtractionResponse, + type ProviderId, +} from "~/types/extraction"; +import type { LLMProviderConfig, Message, Settings } from "~/types/llm"; +import { + anthropicConfig, + openaiConfig, + geminiConfig, +} from "~/utils/llm/providers"; +import { + DEFAULT_EXTRACTION_PROMPT, + buildUserPrompt, +} from "~/prompts/extraction"; +import { parseExtractionResponse } from "~/utils/ai/parseExtractionResponse"; + +export const runtime = "nodejs"; +export const maxDuration = 300; + +const PROVIDER_CONFIGS: Record = { + anthropic: anthropicConfig, + openai: openaiConfig, + gemini: geminiConfig, +}; + +const buildExtractionMessages = ({ + provider, + pdfBase64, + userPrompt, +}: { + provider: ProviderId; + pdfBase64: string; + userPrompt: string; +}): Message[] => { + switch (provider) { + case "anthropic": + return [ + { + role: "user", + content: [ + { + type: "document", + source: { + type: "base64", + media_type: "application/pdf", // eslint-disable-line @typescript-eslint/naming-convention + data: pdfBase64, + }, + }, + { type: "text", text: userPrompt }, + ], + }, + ]; + case "openai": + return [ + { + role: "user", + content: [ + { + type: "file", + file: { + filename: "paper.pdf", + file_data: `data:application/pdf;base64,${pdfBase64}`, // eslint-disable-line @typescript-eslint/naming-convention + }, + }, + { type: "text", text: userPrompt }, + ], + }, + ]; + case "gemini": + return [ + { + role: "user", + content: [ + { + inlineData: { + mimeType: "application/pdf", + data: pdfBase64, + }, + }, + { text: userPrompt }, + ], + }, + ]; + } +}; + +export const POST = async ( + request: NextRequest, +): Promise> => { + let body: unknown; + try { + body = await request.json(); + } catch { + return NextResponse.json( + { success: false, error: "Invalid JSON body" }, + { status: 400 }, + ); + } + + const validated = ExtractionRequestSchema.safeParse(body); + if (!validated.success) { + return NextResponse.json( + { success: false, error: validated.error.message }, + { status: 400 }, + ); + } + + const { pdfBase64, researchQuestion, model, provider, systemPrompt } = + validated.data; + + const config = PROVIDER_CONFIGS[provider]; + const apiKey = process.env[config.apiKeyEnvVar]; + + if (!apiKey) { + return NextResponse.json( + { success: false, error: `API key not configured for ${provider}.` }, + { status: 500 }, + ); + } + + const messages = buildExtractionMessages({ + provider, + pdfBase64, + userPrompt: buildUserPrompt(researchQuestion), + }); + + const settings: Settings = { + model, + maxTokens: 16384, + temperature: 0.6, + systemPrompt: systemPrompt ?? DEFAULT_EXTRACTION_PROMPT, + outputSchema: EXTRACTION_RESULT_JSON_SCHEMA, + }; + + const apiUrl = + typeof config.apiUrl === "function" + ? config.apiUrl(settings) + : config.apiUrl; + + try { + const response = await fetch(apiUrl, { + method: "POST", + headers: config.apiHeaders(apiKey), + body: JSON.stringify(config.formatRequestBody(messages, settings)), + signal: AbortSignal.timeout(270_000), + }); + + if (!response.ok) { + const errorText = await response.text().catch(() => ""); + return NextResponse.json( + { + success: false, + error: `${provider} API error (${response.status}): ${errorText.slice(0, 200)}`, + }, + { status: 502 }, + ); + } + + const responseData: unknown = await response.json(); + const rawText = config.extractResponseText(responseData); + + if (!rawText) { + return NextResponse.json( + { success: false, error: `Empty response from ${provider}` }, + { status: 502 }, + ); + } + + let result; + try { + result = parseExtractionResponse(rawText); + } catch (parseError) { + const message = + parseError instanceof SyntaxError + ? "LLM returned invalid JSON" + : "LLM returned unexpected response structure"; + return NextResponse.json( + { + success: false, + error: `Failed to parse extraction response — ${message}`, + }, + { status: 502 }, + ); + } + + return NextResponse.json({ success: true, data: result }); + } catch (error) { + const message = + error instanceof Error + ? `Extraction failed — ${error.message}` + : "Extraction failed"; + console.error("AI extraction failed:", error); + return NextResponse.json( + { success: false, error: message }, + { status: 500 }, + ); + } +}; diff --git a/apps/website/app/prompts/extraction.ts b/apps/website/app/prompts/extraction.ts new file mode 100644 index 000000000..e1b97678b --- /dev/null +++ b/apps/website/app/prompts/extraction.ts @@ -0,0 +1,59 @@ +export const DEFAULT_EXTRACTION_PROMPT = `You are a research analyst extracting discourse graph nodes from academic papers. + +Extract discrete, atomic nodes from the paper. Each node is one idea: one claim, one observation, one question. + +## Node Types + +- **Evidence**: A discrete observation from a published dataset or experiment cited in the paper (prior work). Past tense. Includes observable, model system, method. Quantitative details when available. +- **Claim**: An interpretive assertion by the authors. Debatable — goes beyond data to state what it means. Specific enough to test or argue against. +- **Question**: A research question — explicitly stated or implied by a gap in the literature. Open-ended. +- **Result**: A discrete observation from this paper's own experiments. Same structure as Evidence but from the current work, not prior studies. Past tense. +- **Theory**: A theoretical framework or model used or proposed. Name it, state its core proposition. +- **Source**: A cited publication. Author(s) and year. + +## Quality + +- Atomic: one idea per node. Split compound sentences. +- Self-contained: understandable without the paper. +- Faithful: no inference or editorializing. +- Specific: "X reduced Y by 43% in Z" not "X was effective." +- 8–25 nodes. Quality over quantity. Cover all sections. +- Evidence = prior work cited. Result = this paper's experiments. + +## Example + +Excerpt (Results): +"CRISPR-edited T cells maintained cytotoxic activity for 12 weeks in vitro (Fig 3A), longer than controls which declined after week 4 (p<0.001). This correlated with elevated CD62L and CCR7 (Fig 3B), suggesting a memory-like phenotype resisting exhaustion." + +{ + "nodes": [ + { + "nodeType": "Result", + "content": "CRISPR-edited T cells maintained cytotoxic activity for 12 weeks in vitro, significantly longer than unedited controls which declined after week 4", + "supportSnippet": "CRISPR-edited T cells maintained cytotoxic activity for 12 weeks in vitro (Fig 3A), longer than controls which declined after week 4 (p<0.001)", + "sourceSection": "Results" + }, + { + "nodeType": "Result", + "content": "Sustained cytotoxic activity of CRISPR-edited T cells correlated with elevated CD62L and CCR7 expression", + "supportSnippet": "This correlated with elevated CD62L and CCR7 (Fig 3B)", + "sourceSection": "Results" + }, + { + "nodeType": "Claim", + "content": "CRISPR editing may promote a memory-like T cell phenotype that resists exhaustion", + "supportSnippet": "suggesting a memory-like phenotype resisting exhaustion", + "sourceSection": "Results" + } + ] +}`; + +export const buildUserPrompt = (researchQuestion?: string): string => { + let prompt = "Extract discourse graph nodes from the attached paper."; + + if (researchQuestion) { + prompt += `\n\nFocus extraction around this research question: ${researchQuestion}`; + } + + return prompt; +}; diff --git a/apps/website/app/types/extraction.ts b/apps/website/app/types/extraction.ts new file mode 100644 index 000000000..1bea620da --- /dev/null +++ b/apps/website/app/types/extraction.ts @@ -0,0 +1,59 @@ +import { z } from "zod"; + +export const PROVIDER_IDS = ["anthropic", "openai", "gemini"] as const; + +export type ProviderId = (typeof PROVIDER_IDS)[number]; + +// eslint-disable-next-line @typescript-eslint/naming-convention +export const ExtractedNodeSchema = z.object({ + nodeType: z.string(), + content: z.string(), + supportSnippet: z.string(), + sourceSection: z.string().nullable(), +}); + +export type ExtractedNode = z.infer; + +// eslint-disable-next-line @typescript-eslint/naming-convention +export const ExtractionResultSchema = z.object({ + nodes: z.array(ExtractedNodeSchema), +}); + +export type ExtractionResult = z.infer; + +// eslint-disable-next-line @typescript-eslint/naming-convention +export const ExtractionRequestSchema = z.object({ + pdfBase64: z.string().min(1).max(44_000_000), + provider: z.enum(PROVIDER_IDS), + model: z.string().min(1), + researchQuestion: z.string().optional(), + systemPrompt: z.string().optional(), +}); + +export type ExtractionRequest = z.infer; + +export const EXTRACTION_RESULT_JSON_SCHEMA: Record = { + type: "object", + properties: { + nodes: { + type: "array", + items: { + type: "object", + properties: { + nodeType: { type: "string" }, + content: { type: "string" }, + supportSnippet: { type: "string" }, + sourceSection: { type: ["string", "null"] }, + }, + required: ["nodeType", "content", "supportSnippet", "sourceSection"], + additionalProperties: false, + }, + }, + }, + required: ["nodes"], + additionalProperties: false, +}; + +export type ExtractionResponse = + | { success: true; data: ExtractionResult } + | { success: false; error: string }; diff --git a/apps/website/app/types/llm.ts b/apps/website/app/types/llm.ts index b461fee47..a048671be 100644 --- a/apps/website/app/types/llm.ts +++ b/apps/website/app/types/llm.ts @@ -1,12 +1,16 @@ +export type ContentBlock = Record; + export type Message = { role: string; - content: string; + content: string | ContentBlock[]; }; export type Settings = { model: string; maxTokens: number; temperature: number; + systemPrompt?: string; + outputSchema?: Record; safetySettings?: Array<{ category: string; threshold: string; diff --git a/apps/website/app/utils/ai/parseExtractionResponse.ts b/apps/website/app/utils/ai/parseExtractionResponse.ts new file mode 100644 index 000000000..4ec7162de --- /dev/null +++ b/apps/website/app/utils/ai/parseExtractionResponse.ts @@ -0,0 +1,9 @@ +import { + ExtractionResultSchema, + type ExtractionResult, +} from "~/types/extraction"; + +export const parseExtractionResponse = (raw: string): ExtractionResult => { + const parsed: unknown = JSON.parse(raw); + return ExtractionResultSchema.parse(parsed); +}; diff --git a/apps/website/app/utils/llm/providers.ts b/apps/website/app/utils/llm/providers.ts index 8a94fec14..b8fb2e1c1 100644 --- a/apps/website/app/utils/llm/providers.ts +++ b/apps/website/app/utils/llm/providers.ts @@ -9,9 +9,26 @@ export const openaiConfig: LLMProviderConfig = { }), formatRequestBody: (messages: Message[], settings: Settings) => ({ model: settings.model, - messages: messages, + messages: [ + ...(settings.systemPrompt + ? [{ role: "system", content: settings.systemPrompt }] + : []), + ...messages.map((m) => ({ role: m.role, content: m.content })), + ], temperature: settings.temperature, max_completion_tokens: settings.maxTokens, + ...(settings.outputSchema && { + // eslint-disable-next-line @typescript-eslint/naming-convention + response_format: { + type: "json_schema", + // eslint-disable-next-line @typescript-eslint/naming-convention + json_schema: { + name: "extraction_result", + strict: true, + schema: settings.outputSchema, + }, + }, + }), }), extractResponseText: (responseData: any) => responseData.choices?.[0]?.message?.content, @@ -26,13 +43,21 @@ export const geminiConfig: LLMProviderConfig = { "Content-Type": "application/json", }), formatRequestBody: (messages: Message[], settings: Settings) => ({ + ...(settings.systemPrompt && { + systemInstruction: { parts: [{ text: settings.systemPrompt }] }, + }), contents: messages.map((msg) => ({ role: msg.role === "user" ? "user" : "model", - parts: [{ text: msg.content }], + parts: + typeof msg.content === "string" ? [{ text: msg.content }] : msg.content, })), generationConfig: { maxOutputTokens: settings.maxTokens, temperature: settings.temperature, + ...(settings.outputSchema && { + responseMimeType: "application/json", + responseJsonSchema: settings.outputSchema, + }), }, safetySettings: settings.safetySettings, }), @@ -52,8 +77,18 @@ export const anthropicConfig: LLMProviderConfig = { formatRequestBody: (messages: Message[], settings: Settings) => ({ model: settings.model, max_tokens: settings.maxTokens, - messages: messages, + messages: messages.map((m) => ({ role: m.role, content: m.content })), temperature: settings.temperature, + ...(settings.systemPrompt && { system: settings.systemPrompt }), + ...(settings.outputSchema && { + // eslint-disable-next-line @typescript-eslint/naming-convention + output_config: { + format: { + type: "json_schema", + schema: settings.outputSchema, + }, + }, + }), }), extractResponseText: (responseData: any) => responseData.content?.[0]?.text, errorMessagePath: "error?.message",