From c106ce12d1dd36e1d2bc1c977a5583e83196ca93 Mon Sep 17 00:00:00 2001 From: sid597 Date: Thu, 2 Apr 2026 22:49:39 +0530 Subject: [PATCH 1/6] ENG-1602: Add PDF extraction API route Multi-provider (Anthropic, OpenAI, Gemini) endpoint for extracting discourse graph nodes from uploaded PDFs. --- apps/website/app/api/ai/extract/route.ts | 240 ++++++++++++++++++ apps/website/app/prompts/extraction.ts | 74 ++++++ apps/website/app/types/extraction.ts | 37 +++ .../app/utils/ai/parseExtractionResponse.ts | 23 ++ 4 files changed, 374 insertions(+) create mode 100644 apps/website/app/api/ai/extract/route.ts create mode 100644 apps/website/app/prompts/extraction.ts create mode 100644 apps/website/app/types/extraction.ts create mode 100644 apps/website/app/utils/ai/parseExtractionResponse.ts diff --git a/apps/website/app/api/ai/extract/route.ts b/apps/website/app/api/ai/extract/route.ts new file mode 100644 index 000000000..9dac2c0e8 --- /dev/null +++ b/apps/website/app/api/ai/extract/route.ts @@ -0,0 +1,240 @@ +import { NextRequest, NextResponse } from "next/server"; +import { + ExtractionRequestSchema, + type ExtractionResponse, + type ProviderId, +} from "~/types/extraction"; +import { + anthropicConfig, + openaiConfig, + geminiConfig, +} from "~/utils/llm/providers"; +import { + DEFAULT_EXTRACTION_PROMPT, + buildUserPrompt, +} from "~/prompts/extraction"; +import { parseExtractionResponse } from "~/utils/ai/parseExtractionResponse"; +import { z } from "zod"; + +export const runtime = "nodejs"; +export const maxDuration = 300; + +type ExtractionParams = { + model: string; + systemPrompt: string; + pdfBase64: string; + userPrompt: string; + apiKey: string; +}; + +type ProviderExtractionConfig = { + apiKeyEnvVar: string; + apiHeaders: (apiKey: string) => Record; + apiUrl: (params: ExtractionParams) => string; + buildRequestBody: (params: ExtractionParams) => unknown; + extractResponseText: (data: unknown) => string | null; +}; + +const openaiResponseSchema = z.object({ + output: z.array( + z.object({ + type: z.string(), + content: z + .array(z.object({ type: z.string(), text: z.string() })) + .optional(), + }), + ), +}); + +const PROVIDERS: Record = { + anthropic: { + apiKeyEnvVar: anthropicConfig.apiKeyEnvVar, + apiHeaders: anthropicConfig.apiHeaders, + apiUrl: () => "https://api.anthropic.com/v1/messages", + buildRequestBody: ({ model, systemPrompt, pdfBase64, userPrompt }) => ({ + model, + max_tokens: 16384, // eslint-disable-line @typescript-eslint/naming-convention + temperature: 0.2, + system: systemPrompt, + messages: [ + { + role: "user", + content: [ + { + type: "document", + source: { + type: "base64", + media_type: "application/pdf", // eslint-disable-line @typescript-eslint/naming-convention + data: pdfBase64, + }, + }, + { type: "text", text: userPrompt }, + ], + }, + ], + }), + extractResponseText: anthropicConfig.extractResponseText, + }, + openai: { + apiKeyEnvVar: openaiConfig.apiKeyEnvVar, + apiHeaders: openaiConfig.apiHeaders, + apiUrl: () => "https://api.openai.com/v1/responses", + buildRequestBody: ({ model, systemPrompt, pdfBase64, userPrompt }) => ({ + model, + instructions: systemPrompt, + input: [ + { + role: "user", + content: [ + { + type: "input_file", + filename: "paper.pdf", + file_data: `data:application/pdf;base64,${pdfBase64}`, // eslint-disable-line @typescript-eslint/naming-convention + }, + { type: "input_text", text: userPrompt }, + ], + }, + ], + temperature: 0.2, + max_output_tokens: 16384, // eslint-disable-line @typescript-eslint/naming-convention + }), + extractResponseText: (data: unknown) => { + const parsed = openaiResponseSchema.safeParse(data); + if (!parsed.success) return null; + const message = parsed.data.output.find((o) => o.type === "message"); + return ( + message?.content?.find((c) => c.type === "output_text")?.text ?? null + ); + }, + }, + gemini: { + apiKeyEnvVar: geminiConfig.apiKeyEnvVar, + apiHeaders: geminiConfig.apiHeaders, + apiUrl: ({ apiKey, model }) => + `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${apiKey}`, + buildRequestBody: ({ systemPrompt, pdfBase64, userPrompt }) => ({ + system_instruction: { parts: [{ text: systemPrompt }] }, // eslint-disable-line @typescript-eslint/naming-convention + contents: [ + { + role: "user", + parts: [ + { + inline_data: { mime_type: "application/pdf", data: pdfBase64 }, // eslint-disable-line @typescript-eslint/naming-convention + }, + { text: userPrompt }, + ], + }, + ], + generationConfig: { + temperature: 0.2, + maxOutputTokens: 16384, + responseMimeType: "application/json", + }, + }), + extractResponseText: geminiConfig.extractResponseText, + }, +}; + +export const POST = async ( + request: NextRequest, +): Promise> => { + let body: unknown; + try { + body = await request.json(); + } catch { + return NextResponse.json( + { success: false, error: "Invalid JSON body" }, + { status: 400 }, + ); + } + + const validated = ExtractionRequestSchema.safeParse(body); + if (!validated.success) { + return NextResponse.json( + { success: false, error: validated.error.message }, + { status: 400 }, + ); + } + + const { pdfBase64, researchQuestion, model, provider, systemPrompt } = + validated.data; + + const config = PROVIDERS[provider]; + const apiKey = process.env[config.apiKeyEnvVar]; + + if (!apiKey) { + return NextResponse.json( + { success: false, error: `API key not configured for ${provider}.` }, + { status: 500 }, + ); + } + + const resolvedSystemPrompt = systemPrompt ?? DEFAULT_EXTRACTION_PROMPT; + const userPrompt = buildUserPrompt(researchQuestion); + const params: ExtractionParams = { + model, + systemPrompt: resolvedSystemPrompt, + pdfBase64, + userPrompt, + apiKey, + }; + + try { + const response = await fetch(config.apiUrl(params), { + method: "POST", + headers: config.apiHeaders(apiKey), + body: JSON.stringify(config.buildRequestBody(params)), + signal: AbortSignal.timeout(270_000), + }); + + if (!response.ok) { + const errorText = await response.text().catch(() => ""); + return NextResponse.json( + { + success: false, + error: `${provider} API error (${response.status}): ${errorText.slice(0, 200)}`, + }, + { status: 502 }, + ); + } + + const responseData: unknown = await response.json(); + const rawText = config.extractResponseText(responseData); + + if (!rawText) { + return NextResponse.json( + { success: false, error: `Empty response from ${provider}` }, + { status: 502 }, + ); + } + + let result; + try { + result = parseExtractionResponse(rawText); + } catch (parseError) { + const message = + parseError instanceof SyntaxError + ? "LLM returned invalid JSON" + : "LLM returned unexpected response structure"; + return NextResponse.json( + { + success: false, + error: `Failed to parse extraction response — ${message}`, + }, + { status: 502 }, + ); + } + + return NextResponse.json({ success: true, data: result }); + } catch (error) { + const message = + error instanceof Error + ? `Extraction failed — ${error.message}` + : "Extraction failed"; + console.error("AI extraction failed:", error); + return NextResponse.json( + { success: false, error: message }, + { status: 500 }, + ); + } +}; diff --git a/apps/website/app/prompts/extraction.ts b/apps/website/app/prompts/extraction.ts new file mode 100644 index 000000000..7d263ca7c --- /dev/null +++ b/apps/website/app/prompts/extraction.ts @@ -0,0 +1,74 @@ +export const DEFAULT_EXTRACTION_PROMPT = `You are a research analyst extracting discourse graph nodes from academic papers. + +Extract discrete, atomic nodes from the paper. Each node is one idea: one claim, one observation, one question. + +## Node Types + +- **Evidence**: A discrete observation from a published dataset or experiment cited in the paper (prior work). Past tense. Includes observable, model system, method. Quantitative details when available. +- **Claim**: An interpretive assertion by the authors. Debatable — goes beyond data to state what it means. Specific enough to test or argue against. +- **Question**: A research question — explicitly stated or implied by a gap in the literature. Open-ended. +- **Result**: A discrete observation from this paper's own experiments. Same structure as Evidence but from the current work, not prior studies. Past tense. +- **Theory**: A theoretical framework or model used or proposed. Name it, state its core proposition. +- **Source**: A cited publication. Author(s) and year. + +## Output + +Return JSON only, no markdown fences: + +{ + "nodes": [ + { + "nodeType": "Evidence|Claim|Question|Result|Theory|Source", + "content": "clear, self-contained statement", + "supportSnippet": "exact quote or figure/table ref from paper, under 250 chars", + "sourceSection": "Introduction|Methods|Results|Discussion|etc" + } + ] +} + +## Quality + +- Atomic: one idea per node. Split compound sentences. +- Self-contained: understandable without the paper. +- Faithful: no inference or editorializing. +- Specific: "X reduced Y by 43% in Z" not "X was effective." +- 8–25 nodes. Quality over quantity. Cover all sections. +- Evidence = prior work cited. Result = this paper's experiments. + +## Example + +Excerpt (Results): +"CRISPR-edited T cells maintained cytotoxic activity for 12 weeks in vitro (Fig 3A), longer than controls which declined after week 4 (p<0.001). This correlated with elevated CD62L and CCR7 (Fig 3B), suggesting a memory-like phenotype resisting exhaustion." + +{ + "nodes": [ + { + "nodeType": "Result", + "content": "CRISPR-edited T cells maintained cytotoxic activity for 12 weeks in vitro, significantly longer than unedited controls which declined after week 4", + "supportSnippet": "CRISPR-edited T cells maintained cytotoxic activity for 12 weeks in vitro (Fig 3A), longer than controls which declined after week 4 (p<0.001)", + "sourceSection": "Results" + }, + { + "nodeType": "Result", + "content": "Sustained cytotoxic activity of CRISPR-edited T cells correlated with elevated CD62L and CCR7 expression", + "supportSnippet": "This correlated with elevated CD62L and CCR7 (Fig 3B)", + "sourceSection": "Results" + }, + { + "nodeType": "Claim", + "content": "CRISPR editing may promote a memory-like T cell phenotype that resists exhaustion", + "supportSnippet": "suggesting a memory-like phenotype resisting exhaustion", + "sourceSection": "Results" + } + ] +}`; + +export const buildUserPrompt = (researchQuestion?: string): string => { + let prompt = "Extract discourse graph nodes from the attached paper."; + + if (researchQuestion) { + prompt += `\n\nFocus extraction around this research question: ${researchQuestion}`; + } + + return prompt; +}; diff --git a/apps/website/app/types/extraction.ts b/apps/website/app/types/extraction.ts new file mode 100644 index 000000000..f5f1e6e6b --- /dev/null +++ b/apps/website/app/types/extraction.ts @@ -0,0 +1,37 @@ +import { z } from "zod"; + +export const PROVIDER_IDS = ["anthropic", "openai", "gemini"] as const; + +export type ProviderId = (typeof PROVIDER_IDS)[number]; + +// eslint-disable-next-line @typescript-eslint/naming-convention +export const ExtractedNodeSchema = z.object({ + nodeType: z.string(), + content: z.string(), + supportSnippet: z.string(), + sourceSection: z.string().optional(), +}); + +export type ExtractedNode = z.infer; + +// eslint-disable-next-line @typescript-eslint/naming-convention +export const ExtractionResultSchema = z.object({ + nodes: z.array(ExtractedNodeSchema), +}); + +export type ExtractionResult = z.infer; + +// eslint-disable-next-line @typescript-eslint/naming-convention +export const ExtractionRequestSchema = z.object({ + pdfBase64: z.string().min(1).max(44_000_000), + provider: z.enum(PROVIDER_IDS), + model: z.string().min(1), + researchQuestion: z.string().optional(), + systemPrompt: z.string().optional(), +}); + +export type ExtractionRequest = z.infer; + +export type ExtractionResponse = + | { success: true; data: ExtractionResult } + | { success: false; error: string }; diff --git a/apps/website/app/utils/ai/parseExtractionResponse.ts b/apps/website/app/utils/ai/parseExtractionResponse.ts new file mode 100644 index 000000000..def23d4ec --- /dev/null +++ b/apps/website/app/utils/ai/parseExtractionResponse.ts @@ -0,0 +1,23 @@ +import { + ExtractionResultSchema, + type ExtractionResult, +} from "~/types/extraction"; + +export const parseExtractionResponse = (raw: string): ExtractionResult => { + let cleaned = raw.trim(); + + if (cleaned.startsWith("```")) { + cleaned = cleaned + .replace(/^```(?:json)?\s*\n?/, "") + .replace(/\n?```\s*$/, ""); + } + + const firstBrace = cleaned.indexOf("{"); + const lastBrace = cleaned.lastIndexOf("}"); + if (firstBrace !== -1 && lastBrace > firstBrace) { + cleaned = cleaned.slice(firstBrace, lastBrace + 1); + } + + const parsed: unknown = JSON.parse(cleaned); + return ExtractionResultSchema.parse(parsed); +}; From 96c2e22315be830b6da79e81aa5556bf371c7c71 Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 3 Apr 2026 10:25:19 +0530 Subject: [PATCH 2/6] ENG-1602: Unify provider configs for chat and extraction Widen Message.content to support multimodal content blocks and add systemPrompt/responseMimeType to Settings. Each provider's formatRequestBody now handles both text-only chat and PDF extraction, eliminating the parallel PROVIDERS block in the extraction route. OpenAI extraction switches from Responses API to Chat Completions (now supports PDF). Gemini field casing fixed to match REST API docs. --- apps/website/app/api/ai/extract/route.ts | 152 +++++++++-------------- apps/website/app/types/llm.ts | 6 +- apps/website/app/utils/llm/providers.ts | 19 ++- 3 files changed, 77 insertions(+), 100 deletions(-) diff --git a/apps/website/app/api/ai/extract/route.ts b/apps/website/app/api/ai/extract/route.ts index 9dac2c0e8..b104fa9ad 100644 --- a/apps/website/app/api/ai/extract/route.ts +++ b/apps/website/app/api/ai/extract/route.ts @@ -4,6 +4,7 @@ import { type ExtractionResponse, type ProviderId, } from "~/types/extraction"; +import type { LLMProviderConfig, Message, Settings } from "~/types/llm"; import { anthropicConfig, openaiConfig, @@ -14,49 +15,26 @@ import { buildUserPrompt, } from "~/prompts/extraction"; import { parseExtractionResponse } from "~/utils/ai/parseExtractionResponse"; -import { z } from "zod"; export const runtime = "nodejs"; export const maxDuration = 300; -type ExtractionParams = { - model: string; - systemPrompt: string; - pdfBase64: string; - userPrompt: string; - apiKey: string; +const PROVIDER_CONFIGS: Record = { + anthropic: anthropicConfig, + openai: openaiConfig, + gemini: geminiConfig, }; -type ProviderExtractionConfig = { - apiKeyEnvVar: string; - apiHeaders: (apiKey: string) => Record; - apiUrl: (params: ExtractionParams) => string; - buildRequestBody: (params: ExtractionParams) => unknown; - extractResponseText: (data: unknown) => string | null; -}; +const buildExtractionMessages = ( + provider: ProviderId, + pdfBase64: string, + userPrompt: string, +): Message[] => { + const textBlock = { type: "text", text: userPrompt }; -const openaiResponseSchema = z.object({ - output: z.array( - z.object({ - type: z.string(), - content: z - .array(z.object({ type: z.string(), text: z.string() })) - .optional(), - }), - ), -}); - -const PROVIDERS: Record = { - anthropic: { - apiKeyEnvVar: anthropicConfig.apiKeyEnvVar, - apiHeaders: anthropicConfig.apiHeaders, - apiUrl: () => "https://api.anthropic.com/v1/messages", - buildRequestBody: ({ model, systemPrompt, pdfBase64, userPrompt }) => ({ - model, - max_tokens: 16384, // eslint-disable-line @typescript-eslint/naming-convention - temperature: 0.2, - system: systemPrompt, - messages: [ + switch (provider) { + case "anthropic": + return [ { role: "user", content: [ @@ -68,71 +46,42 @@ const PROVIDERS: Record = { data: pdfBase64, }, }, - { type: "text", text: userPrompt }, + textBlock, ], }, - ], - }), - extractResponseText: anthropicConfig.extractResponseText, - }, - openai: { - apiKeyEnvVar: openaiConfig.apiKeyEnvVar, - apiHeaders: openaiConfig.apiHeaders, - apiUrl: () => "https://api.openai.com/v1/responses", - buildRequestBody: ({ model, systemPrompt, pdfBase64, userPrompt }) => ({ - model, - instructions: systemPrompt, - input: [ + ]; + case "openai": + return [ { role: "user", content: [ { - type: "input_file", - filename: "paper.pdf", - file_data: `data:application/pdf;base64,${pdfBase64}`, // eslint-disable-line @typescript-eslint/naming-convention + type: "file", + file: { + filename: "paper.pdf", + file_data: `data:application/pdf;base64,${pdfBase64}`, // eslint-disable-line @typescript-eslint/naming-convention + }, }, - { type: "input_text", text: userPrompt }, + textBlock, ], }, - ], - temperature: 0.2, - max_output_tokens: 16384, // eslint-disable-line @typescript-eslint/naming-convention - }), - extractResponseText: (data: unknown) => { - const parsed = openaiResponseSchema.safeParse(data); - if (!parsed.success) return null; - const message = parsed.data.output.find((o) => o.type === "message"); - return ( - message?.content?.find((c) => c.type === "output_text")?.text ?? null - ); - }, - }, - gemini: { - apiKeyEnvVar: geminiConfig.apiKeyEnvVar, - apiHeaders: geminiConfig.apiHeaders, - apiUrl: ({ apiKey, model }) => - `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${apiKey}`, - buildRequestBody: ({ systemPrompt, pdfBase64, userPrompt }) => ({ - system_instruction: { parts: [{ text: systemPrompt }] }, // eslint-disable-line @typescript-eslint/naming-convention - contents: [ + ]; + case "gemini": + return [ { role: "user", - parts: [ + content: [ { - inline_data: { mime_type: "application/pdf", data: pdfBase64 }, // eslint-disable-line @typescript-eslint/naming-convention + inlineData: { + mimeType: "application/pdf", + data: pdfBase64, + }, }, - { text: userPrompt }, + textBlock, ], }, - ], - generationConfig: { - temperature: 0.2, - maxOutputTokens: 16384, - responseMimeType: "application/json", - }, - }), - extractResponseText: geminiConfig.extractResponseText, - }, + ]; + } }; export const POST = async ( @@ -159,7 +108,7 @@ export const POST = async ( const { pdfBase64, researchQuestion, model, provider, systemPrompt } = validated.data; - const config = PROVIDERS[provider]; + const config = PROVIDER_CONFIGS[provider]; const apiKey = process.env[config.apiKeyEnvVar]; if (!apiKey) { @@ -169,21 +118,32 @@ export const POST = async ( ); } - const resolvedSystemPrompt = systemPrompt ?? DEFAULT_EXTRACTION_PROMPT; - const userPrompt = buildUserPrompt(researchQuestion); - const params: ExtractionParams = { - model, - systemPrompt: resolvedSystemPrompt, + const messages = buildExtractionMessages( + provider, pdfBase64, - userPrompt, - apiKey, + buildUserPrompt(researchQuestion), + ); + + const settings: Settings = { + model, + maxTokens: 16384, + temperature: 0.2, + systemPrompt: systemPrompt ?? DEFAULT_EXTRACTION_PROMPT, + ...(provider === "gemini" && { + responseMimeType: "application/json", + }), }; + const apiUrl = + typeof config.apiUrl === "function" + ? config.apiUrl(settings) + : config.apiUrl; + try { - const response = await fetch(config.apiUrl(params), { + const response = await fetch(apiUrl, { method: "POST", headers: config.apiHeaders(apiKey), - body: JSON.stringify(config.buildRequestBody(params)), + body: JSON.stringify(config.formatRequestBody(messages, settings)), signal: AbortSignal.timeout(270_000), }); diff --git a/apps/website/app/types/llm.ts b/apps/website/app/types/llm.ts index b461fee47..d5286374a 100644 --- a/apps/website/app/types/llm.ts +++ b/apps/website/app/types/llm.ts @@ -1,12 +1,16 @@ +export type ContentBlock = Record; + export type Message = { role: string; - content: string; + content: string | ContentBlock[]; }; export type Settings = { model: string; maxTokens: number; temperature: number; + systemPrompt?: string; + responseMimeType?: string; safetySettings?: Array<{ category: string; threshold: string; diff --git a/apps/website/app/utils/llm/providers.ts b/apps/website/app/utils/llm/providers.ts index 8a94fec14..348fcef5d 100644 --- a/apps/website/app/utils/llm/providers.ts +++ b/apps/website/app/utils/llm/providers.ts @@ -9,7 +9,12 @@ export const openaiConfig: LLMProviderConfig = { }), formatRequestBody: (messages: Message[], settings: Settings) => ({ model: settings.model, - messages: messages, + messages: [ + ...(settings.systemPrompt + ? [{ role: "system", content: settings.systemPrompt }] + : []), + ...messages.map((m) => ({ role: m.role, content: m.content })), + ], temperature: settings.temperature, max_completion_tokens: settings.maxTokens, }), @@ -26,13 +31,20 @@ export const geminiConfig: LLMProviderConfig = { "Content-Type": "application/json", }), formatRequestBody: (messages: Message[], settings: Settings) => ({ + ...(settings.systemPrompt && { + systemInstruction: { parts: [{ text: settings.systemPrompt }] }, + }), contents: messages.map((msg) => ({ role: msg.role === "user" ? "user" : "model", - parts: [{ text: msg.content }], + parts: + typeof msg.content === "string" ? [{ text: msg.content }] : msg.content, })), generationConfig: { maxOutputTokens: settings.maxTokens, temperature: settings.temperature, + ...(settings.responseMimeType && { + responseMimeType: settings.responseMimeType, + }), }, safetySettings: settings.safetySettings, }), @@ -52,8 +64,9 @@ export const anthropicConfig: LLMProviderConfig = { formatRequestBody: (messages: Message[], settings: Settings) => ({ model: settings.model, max_tokens: settings.maxTokens, - messages: messages, + messages: messages.map((m) => ({ role: m.role, content: m.content })), temperature: settings.temperature, + ...(settings.systemPrompt && { system: settings.systemPrompt }), }), extractResponseText: (responseData: any) => responseData.content?.[0]?.text, errorMessagePath: "error?.message", From a33805a0674514685308350c0b7f3afb7ebec2c7 Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 3 Apr 2026 11:09:03 +0530 Subject: [PATCH 3/6] ENG-1602: Enforce JSON schema output for all providers Add structured output enforcement via each provider's native mechanism: Anthropic output_config, OpenAI response_format with strict mode, Gemini responseJsonSchema. Removes prompt-based JSON instructions and response cleanup parsing since constrained decoding guarantees valid JSON. --- apps/website/app/api/ai/extract/route.ts | 7 +++--- apps/website/app/prompts/extraction.ts | 15 ------------ apps/website/app/types/extraction.ts | 24 ++++++++++++++++++- apps/website/app/types/llm.ts | 2 +- .../app/utils/ai/parseExtractionResponse.ts | 16 +------------ apps/website/app/utils/llm/providers.ts | 23 ++++++++++++++++-- 6 files changed, 49 insertions(+), 38 deletions(-) diff --git a/apps/website/app/api/ai/extract/route.ts b/apps/website/app/api/ai/extract/route.ts index b104fa9ad..26de8bc9b 100644 --- a/apps/website/app/api/ai/extract/route.ts +++ b/apps/website/app/api/ai/extract/route.ts @@ -1,6 +1,7 @@ import { NextRequest, NextResponse } from "next/server"; import { ExtractionRequestSchema, + EXTRACTION_RESULT_JSON_SCHEMA, type ExtractionResponse, type ProviderId, } from "~/types/extraction"; @@ -127,11 +128,9 @@ export const POST = async ( const settings: Settings = { model, maxTokens: 16384, - temperature: 0.2, + temperature: 0.6, systemPrompt: systemPrompt ?? DEFAULT_EXTRACTION_PROMPT, - ...(provider === "gemini" && { - responseMimeType: "application/json", - }), + outputSchema: EXTRACTION_RESULT_JSON_SCHEMA, }; const apiUrl = diff --git a/apps/website/app/prompts/extraction.ts b/apps/website/app/prompts/extraction.ts index 7d263ca7c..e1b97678b 100644 --- a/apps/website/app/prompts/extraction.ts +++ b/apps/website/app/prompts/extraction.ts @@ -11,21 +11,6 @@ Extract discrete, atomic nodes from the paper. Each node is one idea: one claim, - **Theory**: A theoretical framework or model used or proposed. Name it, state its core proposition. - **Source**: A cited publication. Author(s) and year. -## Output - -Return JSON only, no markdown fences: - -{ - "nodes": [ - { - "nodeType": "Evidence|Claim|Question|Result|Theory|Source", - "content": "clear, self-contained statement", - "supportSnippet": "exact quote or figure/table ref from paper, under 250 chars", - "sourceSection": "Introduction|Methods|Results|Discussion|etc" - } - ] -} - ## Quality - Atomic: one idea per node. Split compound sentences. diff --git a/apps/website/app/types/extraction.ts b/apps/website/app/types/extraction.ts index f5f1e6e6b..1bea620da 100644 --- a/apps/website/app/types/extraction.ts +++ b/apps/website/app/types/extraction.ts @@ -9,7 +9,7 @@ export const ExtractedNodeSchema = z.object({ nodeType: z.string(), content: z.string(), supportSnippet: z.string(), - sourceSection: z.string().optional(), + sourceSection: z.string().nullable(), }); export type ExtractedNode = z.infer; @@ -32,6 +32,28 @@ export const ExtractionRequestSchema = z.object({ export type ExtractionRequest = z.infer; +export const EXTRACTION_RESULT_JSON_SCHEMA: Record = { + type: "object", + properties: { + nodes: { + type: "array", + items: { + type: "object", + properties: { + nodeType: { type: "string" }, + content: { type: "string" }, + supportSnippet: { type: "string" }, + sourceSection: { type: ["string", "null"] }, + }, + required: ["nodeType", "content", "supportSnippet", "sourceSection"], + additionalProperties: false, + }, + }, + }, + required: ["nodes"], + additionalProperties: false, +}; + export type ExtractionResponse = | { success: true; data: ExtractionResult } | { success: false; error: string }; diff --git a/apps/website/app/types/llm.ts b/apps/website/app/types/llm.ts index d5286374a..a048671be 100644 --- a/apps/website/app/types/llm.ts +++ b/apps/website/app/types/llm.ts @@ -10,7 +10,7 @@ export type Settings = { maxTokens: number; temperature: number; systemPrompt?: string; - responseMimeType?: string; + outputSchema?: Record; safetySettings?: Array<{ category: string; threshold: string; diff --git a/apps/website/app/utils/ai/parseExtractionResponse.ts b/apps/website/app/utils/ai/parseExtractionResponse.ts index def23d4ec..4ec7162de 100644 --- a/apps/website/app/utils/ai/parseExtractionResponse.ts +++ b/apps/website/app/utils/ai/parseExtractionResponse.ts @@ -4,20 +4,6 @@ import { } from "~/types/extraction"; export const parseExtractionResponse = (raw: string): ExtractionResult => { - let cleaned = raw.trim(); - - if (cleaned.startsWith("```")) { - cleaned = cleaned - .replace(/^```(?:json)?\s*\n?/, "") - .replace(/\n?```\s*$/, ""); - } - - const firstBrace = cleaned.indexOf("{"); - const lastBrace = cleaned.lastIndexOf("}"); - if (firstBrace !== -1 && lastBrace > firstBrace) { - cleaned = cleaned.slice(firstBrace, lastBrace + 1); - } - - const parsed: unknown = JSON.parse(cleaned); + const parsed: unknown = JSON.parse(raw); return ExtractionResultSchema.parse(parsed); }; diff --git a/apps/website/app/utils/llm/providers.ts b/apps/website/app/utils/llm/providers.ts index 348fcef5d..f71315980 100644 --- a/apps/website/app/utils/llm/providers.ts +++ b/apps/website/app/utils/llm/providers.ts @@ -17,6 +17,16 @@ export const openaiConfig: LLMProviderConfig = { ], temperature: settings.temperature, max_completion_tokens: settings.maxTokens, + ...(settings.outputSchema && { + response_format: { + type: "json_schema", + json_schema: { + name: "extraction_result", + strict: true, + schema: settings.outputSchema, + }, + }, + }), }), extractResponseText: (responseData: any) => responseData.choices?.[0]?.message?.content, @@ -42,8 +52,9 @@ export const geminiConfig: LLMProviderConfig = { generationConfig: { maxOutputTokens: settings.maxTokens, temperature: settings.temperature, - ...(settings.responseMimeType && { - responseMimeType: settings.responseMimeType, + ...(settings.outputSchema && { + responseMimeType: "application/json", + responseJsonSchema: settings.outputSchema, }), }, safetySettings: settings.safetySettings, @@ -67,6 +78,14 @@ export const anthropicConfig: LLMProviderConfig = { messages: messages.map((m) => ({ role: m.role, content: m.content })), temperature: settings.temperature, ...(settings.systemPrompt && { system: settings.systemPrompt }), + ...(settings.outputSchema && { + output_config: { + format: { + type: "json_schema", + schema: settings.outputSchema, + }, + }, + }), }), extractResponseText: (responseData: any) => responseData.content?.[0]?.text, errorMessagePath: "error?.message", From f7c7871a6b98df9a3499c11171a101221d2c342c Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 3 Apr 2026 11:11:03 +0530 Subject: [PATCH 4/6] ENG-1602: Use object destructuring for buildExtractionMessages Per AGENTS.md: functions with more than 2 parameters use named parameters via object destructuring. --- apps/website/app/api/ai/extract/route.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/apps/website/app/api/ai/extract/route.ts b/apps/website/app/api/ai/extract/route.ts index 26de8bc9b..16f9ec471 100644 --- a/apps/website/app/api/ai/extract/route.ts +++ b/apps/website/app/api/ai/extract/route.ts @@ -26,11 +26,15 @@ const PROVIDER_CONFIGS: Record = { gemini: geminiConfig, }; -const buildExtractionMessages = ( - provider: ProviderId, - pdfBase64: string, - userPrompt: string, -): Message[] => { +const buildExtractionMessages = ({ + provider, + pdfBase64, + userPrompt, +}: { + provider: ProviderId; + pdfBase64: string; + userPrompt: string; +}): Message[] => { const textBlock = { type: "text", text: userPrompt }; switch (provider) { @@ -119,11 +123,11 @@ export const POST = async ( ); } - const messages = buildExtractionMessages( + const messages = buildExtractionMessages({ provider, pdfBase64, - buildUserPrompt(researchQuestion), - ); + userPrompt: buildUserPrompt(researchQuestion), + }); const settings: Settings = { model, From e4916cda40daa2f7f6b9f02dee8137526b371450 Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 3 Apr 2026 11:28:57 +0530 Subject: [PATCH 5/6] ENG-1602: Add eslint-disable for API-required snake_case fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline disables for response_format, json_schema (OpenAI), and output_config (Anthropic) — external API contract names. --- apps/website/app/utils/llm/providers.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/website/app/utils/llm/providers.ts b/apps/website/app/utils/llm/providers.ts index f71315980..b8fb2e1c1 100644 --- a/apps/website/app/utils/llm/providers.ts +++ b/apps/website/app/utils/llm/providers.ts @@ -18,8 +18,10 @@ export const openaiConfig: LLMProviderConfig = { temperature: settings.temperature, max_completion_tokens: settings.maxTokens, ...(settings.outputSchema && { + // eslint-disable-next-line @typescript-eslint/naming-convention response_format: { type: "json_schema", + // eslint-disable-next-line @typescript-eslint/naming-convention json_schema: { name: "extraction_result", strict: true, @@ -79,6 +81,7 @@ export const anthropicConfig: LLMProviderConfig = { temperature: settings.temperature, ...(settings.systemPrompt && { system: settings.systemPrompt }), ...(settings.outputSchema && { + // eslint-disable-next-line @typescript-eslint/naming-convention output_config: { format: { type: "json_schema", From ed9bfcf94c4747db326e96154b0c6931616a3cde Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 3 Apr 2026 12:27:58 +0530 Subject: [PATCH 6/6] ENG-1602: Fix Gemini text part format Gemini parts use { text } not { type: "text", text }. The shared textBlock was using the Anthropic/OpenAI format which Gemini rejects. --- apps/website/app/api/ai/extract/route.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/apps/website/app/api/ai/extract/route.ts b/apps/website/app/api/ai/extract/route.ts index 16f9ec471..fd14394f6 100644 --- a/apps/website/app/api/ai/extract/route.ts +++ b/apps/website/app/api/ai/extract/route.ts @@ -35,8 +35,6 @@ const buildExtractionMessages = ({ pdfBase64: string; userPrompt: string; }): Message[] => { - const textBlock = { type: "text", text: userPrompt }; - switch (provider) { case "anthropic": return [ @@ -51,7 +49,7 @@ const buildExtractionMessages = ({ data: pdfBase64, }, }, - textBlock, + { type: "text", text: userPrompt }, ], }, ]; @@ -67,7 +65,7 @@ const buildExtractionMessages = ({ file_data: `data:application/pdf;base64,${pdfBase64}`, // eslint-disable-line @typescript-eslint/naming-convention }, }, - textBlock, + { type: "text", text: userPrompt }, ], }, ]; @@ -82,7 +80,7 @@ const buildExtractionMessages = ({ data: pdfBase64, }, }, - textBlock, + { text: userPrompt }, ], }, ];