diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 813121c..64905d9 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -1155,3 +1155,8 @@ Next: After running with reduced logs, gather traces for 'Chapter not found' and - Why: Make first-time contributors productive quickly and capture actionable decomposition plans for known monoliths. - Details: Add onboarding walkthrough (load → translate flow), refresh CONTRIBUTING references to match current code locations, and add draft decomposition plans for `services/epubService.ts`, `store/slices/imageSlice.ts`, and `services/navigationService.ts`. - Tests: N/A (docs only) + +2025-12-22 04:45 UTC - Refactor: isolate `epubService.ts` decomposition modules +- Files: services/epubService.ts; services/epubService/**; services/epub/types.ts; docs/WORKLOG.md +- Why: Decompose the `services/epubService.ts` monolith without breaking the existing `services/epub/*` pipeline/types; keep new modules <300 LOC. +- Tests: `npx tsc --noEmit`; `npm test -- --run tests/services/epubService.test.ts tests/epub/*.test.ts` diff --git a/services/epubService.ts b/services/epubService.ts index 1d07966..3a20312 100644 --- a/services/epubService.ts +++ b/services/epubService.ts @@ -1,1280 +1,54 @@ -import { SessionChapterData, AppSettings } from '../types'; -import JSZip from 'jszip'; -import { toStrictXhtml } from './translate/HtmlSanitizer'; - -// XHTML/XML namespaces used for strict XML serialization -const XHTML_NS = 'http://www.w3.org/1999/xhtml'; -const XML_NS = 'http://www.w3.org/XML/1998/namespace'; -const EPUB_NS = 'http://www.idpf.org/2007/ops'; -const XLINK_NS = 'http://www.w3.org/1999/xlink'; - -// Simplified XML Name validation (sufficient for XHTML attribute names) -const XML_NAME = /^[A-Za-z_][A-Za-z0-9._:-]*$/; - -// Basic bans for unsafe attributes -function isBannedAttr(name: string) { - return name.startsWith('on') || name === 'srcdoc'; -} - -// Very lightweight CSS sanitizer; keep as a single attribute -function sanitizeStyle(value: string) { - const v = (value ?? '').replace(/[\u0000-\u001F\u007F]/g, ''); - if (/url\s*\(\s*javascript:/i.test(v)) return ''; - if (/expression\s*\(/i.test(v)) return ''; - return v.trim(); -} - -function setAttrNS(el: Element, name: string, value: string) { - if (name === 'xml:lang') { el.setAttributeNS(XML_NS, name, value); return; } - if (name.startsWith('epub:')) { el.setAttributeNS(EPUB_NS, name, value); return; } - if (name.startsWith('xlink:')) { el.setAttributeNS(XLINK_NS, name, value); return; } - el.setAttribute(name, value); -} - -function copyAttributesSafely(srcEl: Element, dstEl: Element) { - for (const attr of Array.from(srcEl.attributes)) { - let name = attr.name; - let value = attr.value ?? ''; - - // Keep style as a single attribute; do not expand/split - if (name.toLowerCase() === 'style') { - const s = sanitizeStyle(value); - if (s) dstEl.setAttribute('style', s); - continue; - } - - // Drop unsafe attributes - if (isBannedAttr(name)) continue; - - // Validate XML name to avoid InvalidCharacterError (e.g., 'down;') - if (!XML_NAME.test(name)) { - try { console.warn('[EPUB XClone] Dropping invalid attribute', name, 'on <' + srcEl.tagName + '>'); } catch {} - continue; - } - - // reject unknown namespace prefixes (avoid unbound prefixes) - if (name.includes(':')) { - const [prefix] = name.split(':', 1); - const ok = prefix === 'xml' || prefix === 'epub' || prefix === 'xlink'; - if (!ok) continue; - } - - // Normalize non-namespaced names to lowercase - if (!name.includes(':')) name = name.toLowerCase(); - - try { - setAttrNS(dstEl, name, value); - } catch (e) { - try { - const snippet = (srcEl as any).outerHTML ? (srcEl as any).outerHTML.slice(0, 160).replace(/\s+/g, ' ') : `<${srcEl.tagName}>`; - console.warn('[EPUB XClone] Could not set attribute', name, 'value=', value, 'on', snippet, e); - } catch {} - // Continue without throwing - } - } -} - -// Clone an HTML node tree into an XHTML XMLDocument parent -function cloneIntoXhtml(srcNode: Node, xdoc: XMLDocument, dstParent: Element) { - switch (srcNode.nodeType) { - case Node.ELEMENT_NODE: { - const srcEl = srcNode as Element; - // Lowercase localName for XHTML consistency; guard invalid names - const name = srcEl.localName.toLowerCase(); - const isValidXmlLocalName = /^[A-Za-z_][A-Za-z0-9._-]*$/.test(name); - if (!isValidXmlLocalName) { - // Skip invalid element; clone its children directly into parent - for (const child of Array.from(srcEl.childNodes)) { - cloneIntoXhtml(child, xdoc, dstParent); - } - break; - } - const el = xdoc.createElementNS(XHTML_NS, name); - // Copy attributes safely (validated + namespaced) - copyAttributesSafely(srcEl, el); - // Ensure has alt for accessibility nicety - if (el.localName === 'img' && !el.hasAttribute('alt')) { - el.setAttribute('alt', ''); - } - // Avoid scripts in EPUB content - if (el.localName !== 'script') { - for (const child of Array.from(srcEl.childNodes)) { - cloneIntoXhtml(child, xdoc, el); - } - } - dstParent.appendChild(el); - break; - } - case Node.TEXT_NODE: { - dstParent.appendChild(xdoc.createTextNode((srcNode as Text).data)); - break; - } - // Omit comments/CDATA by default for chapters - default: - break; - } -} - -// Convert an HTML fragment string into serialized XHTML fragment -function htmlFragmentToXhtml(fragmentHtml: string): string { - // Repair common broken void tags like then the quote remains as text - fragmentHtml = fragmentHtml - .replace(/') - .replace(/') - .replace(/'); - // 1) Tolerant parse as HTML - const htmlDoc = new DOMParser().parseFromString(fragmentHtml, 'text/html'); - // 2) Create fresh XHTML document and a container - const xdoc = document.implementation.createDocument(XHTML_NS, 'html', null); - const htmlEl = xdoc.documentElement; - // Bind common namespaces used by EPUB content - htmlEl.setAttribute('xmlns:epub', EPUB_NS); - // Default language; may be overridden per element via xml:lang during cloning - if (!htmlEl.hasAttribute('xml:lang')) htmlEl.setAttributeNS(XML_NS, 'xml:lang', 'en'); - const body = xdoc.createElementNS(XHTML_NS, 'body'); - htmlEl.appendChild(body); - // 3) Clone children into XHTML body - for (const node of Array.from(htmlDoc.body.childNodes)) { - cloneIntoXhtml(node, xdoc, body); - } - // 4) Serialize children individually to avoid wrapping markup - const serializer = new XMLSerializer(); - const parts: string[] = []; - for (const child of Array.from(body.childNodes)) { - parts.push(serializer.serializeToString(child as any)); - } - let xhtml = parts.join(''); - // 5) Prefer numeric nbsp entity for max compatibility - xhtml = xhtml.replace(/\u00A0/g, ' '); - return xhtml; -} - -// Very small allowlist sanitizer for inline/basic block tags used in chapters -function sanitizeHtmlAllowlist(html: string): string { - const allowedTags = new Set([ - 'i','em','b','strong','u','s','br','sup','sub','a','p','ul','ol','li','span' - ]); - const doc = new DOMParser().parseFromString(html, 'text/html'); - const body = doc.body; - - const unwrapNode = (node: Element) => { - const parent = node.parentNode; - if (!parent) return; - while (node.firstChild) parent.insertBefore(node.firstChild, node); - parent.removeChild(node); - }; - - const isSafeHref = (href: string): boolean => { - try { - const url = new URL(href, 'https://example.com'); - const proto = (url.protocol || '').toLowerCase(); - return proto === 'http:' || proto === 'https:' || proto === 'mailto:'; - } catch { return false; } - }; - - const sanitizeEl = (el: Element) => { - // Copy array since we'll mutate children - for (const child of Array.from(el.childNodes)) { - if (child.nodeType === Node.COMMENT_NODE) { - el.removeChild(child); - continue; - } - if (child.nodeType === Node.ELEMENT_NODE) { - const c = child as Element; - const tag = c.tagName.toLowerCase(); - if (!allowedTags.has(tag)) { - // unwrap unknown element, keep its children - unwrapNode(c); - continue; - } - // Strip disallowed attributes - for (const attr of Array.from(c.attributes)) { - const name = attr.name.toLowerCase(); - const value = attr.value; - const isEvent = name.startsWith('on'); - if (isEvent || name === 'style') { c.removeAttribute(attr.name); continue; } - if (tag === 'a') { - if (name === 'href') { - if (!isSafeHref(value)) c.removeAttribute('href'); - continue; - } - if (name === 'title') continue; - // drop everything else on - c.removeAttribute(attr.name); - continue; - } - if (tag === 'span') { - // Keep our placeholders only - if (name === 'data-illu' || name === 'data-fn') continue; - c.removeAttribute(attr.name); - continue; - } - // For other allowed tags: drop all attributes - c.removeAttribute(attr.name); - } - sanitizeEl(c); - } - } - }; - sanitizeEl(body); - return body.innerHTML; -} - -// Replace newline characters in text nodes with
elements for display parity -function convertNewlinesToBrInElement(root: Element) { - const walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT, null); - const textNodes: Text[] = []; - let node: Node | null; - while ((node = walker.nextNode())) { - const t = node as Text; - if (t.data.includes('\n')) textNodes.push(t); - } - for (const t of textNodes) { - const parts = t.data.split(/\n/); - const frag = document.createDocumentFragment(); - parts.forEach((part, idx) => { - if (part) frag.appendChild(document.createTextNode(part)); - if (idx < parts.length - 1) frag.appendChild(document.createElement('br')); - }); - t.parentNode?.replaceChild(frag, t); - } -} - -export interface ChapterForEpub { - title: string; - originalTitle?: string; - content: string; - originalUrl: string; - url?: string; - translatedTitle: string; - translatedContent?: string; - prevUrl?: string | null; - nextUrl?: string | null; - usageMetrics: { - totalTokens: number; - promptTokens: number; - completionTokens: number; - estimatedCost: number; - requestTime: number; - provider: string; - model: string; - }; - images: Array<{ - marker: string; - imageData: string; // base64 data URL - prompt: string; - }>; - footnotes?: Array<{ - marker: string; - text: string; - }>; -} - -export interface TranslationStats { - totalCost: number; - totalTime: number; - totalTokens: number; - chapterCount: number; - imageCount: number; - providerBreakdown: Record; - modelBreakdown: Record; -} - -export interface TelemetryInsights { - totalEvents: number; - sessionDurationMs: number; - navigation: { count: number; totalMs: number; averageMs: number }; - hydration: { count: number; totalMs: number; averageMs: number }; - chapterReady: { count: number; totalMs: number; averageMs: number }; - exports?: { json?: { count: number; totalMs: number; averageMs: number }; epub?: { count: number; totalMs: number; averageMs: number } }; -} - -export interface NovelConfig { - title: string; - author: string; - originalTitle?: string; - description?: string; - genre?: string; - language: string; - originalLanguage?: string; - coverImage?: string; // base64 or URL - seriesName?: string; - volumeNumber?: number; - isbn?: string; - publisher?: string; - translationNotes?: string; -} - -export interface EpubTemplate { - gratitudeMessage?: string; - projectDescription?: string; - githubUrl?: string; - additionalAcknowledgments?: string; - customFooter?: string; -} - -export interface EpubExportOptions { - title?: string; - author?: string; - description?: string; - chapters: ChapterForEpub[]; - settings: AppSettings; - template?: EpubTemplate; - novelConfig?: NovelConfig; - telemetryInsights?: TelemetryInsights; - includeTitlePage?: boolean; - includeStatsPage?: boolean; - customTemplate?: any; - manualConfig?: any; - chapterUrls?: string[]; -} - -/** - * Collects active version chapters from session data for EPUB export - * Uses activeVersion tracking to determine which translation to include - */ -export const collectActiveVersions = ( - sessionData: Record, - urlHistory: string[] -): ChapterForEpub[] => { - const chapters: ChapterForEpub[] = []; - - // Use urlHistory for ordering, but also include any chapters not in history - // First, process chapters in urlHistory order to maintain chronological sequence - const processedUrls = new Set(); - - // Add chapters from urlHistory first (in order) - for (const url of urlHistory) { - if (sessionData[url]?.chapter && sessionData[url]?.translationResult) { - processedUrls.add(url); - const data = sessionData[url]; - chapters.push(createChapterForEpub(data, url)); - } - } - - // Then add any remaining chapters not in urlHistory (sorted by URL for consistency) - const remainingUrls = Object.keys(sessionData) - .filter(url => !processedUrls.has(url)) - .sort(); - - for (const url of remainingUrls) { - const data = sessionData[url]; - if (!data?.chapter || !data?.translationResult) { - console.log(`[EPUBService] Skipping ${url} - missing chapter or translation result`); - continue; - } - - chapters.push(createChapterForEpub(data, url)); - } - - console.log(`[EPUBService] Prepared ${chapters.length} chapters for EPUB in chronological order`); - return chapters; -}; - -/** - * Creates a ChapterForEpub object from session data - */ -const createChapterForEpub = (data: any, url: string): ChapterForEpub => { - // Create default metrics for chapters missing usage data - let metrics = data.translationResult.usageMetrics; - - if (!metrics) { - console.warn(`[EPUBService] Chapter ${url} missing usageMetrics - using defaults for statistics`); - metrics = { - totalTokens: 0, - promptTokens: 0, - completionTokens: 0, - estimatedCost: 0, - requestTime: 0, - provider: 'Unknown', - model: 'Unknown' - }; - } else { - // Validate and fix invalid metrics values - const fixedMetrics = { - totalTokens: typeof metrics.totalTokens === 'number' && isFinite(metrics.totalTokens) ? metrics.totalTokens : 0, - promptTokens: typeof metrics.promptTokens === 'number' && isFinite(metrics.promptTokens) ? metrics.promptTokens : 0, - completionTokens: typeof metrics.completionTokens === 'number' && isFinite(metrics.completionTokens) ? metrics.completionTokens : 0, - estimatedCost: typeof metrics.estimatedCost === 'number' && isFinite(metrics.estimatedCost) ? metrics.estimatedCost : 0, - requestTime: typeof metrics.requestTime === 'number' && isFinite(metrics.requestTime) ? metrics.requestTime : 0, - provider: typeof metrics.provider === 'string' ? metrics.provider : 'Unknown', - model: typeof metrics.model === 'string' ? metrics.model : 'Unknown' - }; - - // Check if we had to fix any values - const hadInvalidData = Object.keys(metrics).some(key => - metrics[key] !== fixedMetrics[key] - ); - - if (hadInvalidData) { - console.warn(`[EPUBService] Chapter ${url} had invalid usageMetrics - fixed for statistics:`, { - original: metrics, - fixed: fixedMetrics - }); - } - - metrics = fixedMetrics; - } - - // Get images from translation result - const images = data.translationResult.suggestedIllustrations?.map(illust => ({ - marker: illust.placementMarker, - imageData: illust.url || '', // This should be base64 data from generation - prompt: illust.imagePrompt - })) || []; - - // Get footnotes from translation result - const footnotes = data.translationResult.footnotes?.map(footnote => ({ - marker: footnote.marker, - text: footnote.text - })) || []; - - const translatedContent = data.translationResult.translation || ''; - - return { - title: data.chapter.title, - originalTitle: data.chapter.originalTitle || data.chapter.title, - content: data.chapter.content, - originalUrl: url, - url, - translatedTitle: data.translationResult.translatedTitle, - translatedContent, - prevUrl: data.chapter.prevUrl ?? null, - nextUrl: data.chapter.nextUrl ?? null, - usageMetrics: { - totalTokens: metrics.totalTokens, - promptTokens: metrics.promptTokens, - completionTokens: metrics.completionTokens, - estimatedCost: metrics.estimatedCost, - requestTime: metrics.requestTime, - provider: metrics.provider, - model: metrics.model, - }, - images: images.filter(img => img.imageData), // Only include images with data - footnotes: footnotes - }; -}; - -/** - * Calculates comprehensive statistics from collected chapters - */ -export const calculateTranslationStats = (chapters: ChapterForEpub[]): TranslationStats => { - const stats: TranslationStats = { - totalCost: 0, - totalTime: 0, - totalTokens: 0, - chapterCount: chapters.length, - imageCount: 0, - providerBreakdown: {}, - modelBreakdown: {} - }; - - chapters.forEach(chapter => { - const metrics = chapter.usageMetrics; - - // Aggregate totals - stats.totalCost += metrics.estimatedCost; - stats.totalTime += metrics.requestTime; - stats.totalTokens += metrics.totalTokens; - stats.imageCount += chapter.images.length; - - // Provider breakdown - if (!stats.providerBreakdown[metrics.provider]) { - stats.providerBreakdown[metrics.provider] = { - chapters: 0, - cost: 0, - time: 0, - tokens: 0 - }; - } - const providerStats = stats.providerBreakdown[metrics.provider]; - providerStats.chapters += 1; - providerStats.cost += metrics.estimatedCost; - providerStats.time += metrics.requestTime; - providerStats.tokens += metrics.totalTokens; - - // Model breakdown - if (!stats.modelBreakdown[metrics.model]) { - stats.modelBreakdown[metrics.model] = { - chapters: 0, - cost: 0, - time: 0, - tokens: 0 - }; - } - const modelStats = stats.modelBreakdown[metrics.model]; - modelStats.chapters += 1; - modelStats.cost += metrics.estimatedCost; - modelStats.time += metrics.requestTime; - modelStats.tokens += metrics.totalTokens; - }); - - return stats; -}; - -/** - * Default template for EPUB metadata - * This template can be customized by users to personalize their EPUB exports - */ -export const getDefaultTemplate = ():EpubTemplate => ({ - gratitudeMessage: `This translation was made possible through the remarkable capabilities of modern AI language models. We express our deep gratitude to the teams behind these technologies who have made creative translation accessible to everyone.`, - - projectDescription: `This e-book was generated using LexiconForge, an open-source AI translation platform that enables high-quality, creative translations of literature. The platform supports multiple AI providers and allows for collaborative refinement of translations.`, - - githubUrl: 'https://github.com/anantham/LexiconForge', - - additionalAcknowledgments: `Special thanks to the original authors whose creative works inspire these translations, and to the open-source community that makes tools like this possible. Translation is an art that bridges cultures and languages, bringing stories to new audiences worldwide.`, - - customFooter: '' -}); - -/** - * Creates a customizable template - users can override any field - * Example usage: - * const myTemplate = createCustomTemplate({ - * gratitudeMessage: 'My custom gratitude message...', - * githubUrl: 'https://github.com/myuser/myproject' - * }); - */ -export const createCustomTemplate = (overrides: Partial): EpubTemplate => { - const def = getDefaultTemplate(); - const merge = (a: any, b: any): any => - Object.fromEntries(Object.keys({ ...a, ...b }).map(k => { - const av = (a as any)[k], bv = (b as any)[k]; - return [k, (av && typeof av === 'object' && bv && typeof bv === 'object') ? merge(av, bv) : (bv ?? av)]; - })); - return merge(def, overrides ?? {}); -}; - -/** - * Gets novel configuration based on URL or manual configuration - * This allows for novel-specific metadata like title, author, etc. - */ -export const getNovelConfig = (firstChapterUrl?: string, manualConfig?: Partial): NovelConfig => { - // Default configuration - const defaultConfig: NovelConfig = { - title: 'Translated Novel', - author: 'Unknown Author', - language: 'en', - originalLanguage: 'ja', - publisher: 'LexiconForge Community' - }; - - // Novel-specific configurations based on URL patterns - let novelSpecificConfig: Partial = {}; - - if (firstChapterUrl) { - if (firstChapterUrl.includes('kakuyomu.jp')) { - // Enhanced configuration based on Novel Updates data - novelSpecificConfig = { - title: 'The Reincarnation of the Strongest Exorcist in Another World', - author: 'Kosuzu Kiichi', - originalTitle: '最強陰陽師の異世界転生記 〜下僕の妖怪どもに比べてモンスターが弱すぎるんだが〜', - description: 'Haruyoshi, the strongest exorcist was on the verge of death after the betrayal of his companions. Hoping to be happy in the next life, he tried the secret technique of reincarnation and was sent to a different world! Born into a family of magicians, the magic he failed to inherit was nothing compared to his previous skills as an exorcist. "Who needs magic? I\'ll survive in this world with my old techniques!"', - genre: 'Action, Adventure, Fantasy, Harem, Romance', - originalLanguage: 'ja', - seriesName: 'The Reincarnation of the Strongest Exorcist', - volumeNumber: 1, - isbn: 'urn:uuid:strongest-exorcist-v1', - publisher: 'Futabasha (Original) / J-Novel Club (English)', - translationNotes: 'Translated from Japanese web novel published on Kakuyomu and Syosetu. Originally published in 2018 by Kosuzu Kiichi. Licensed by J-Novel Club for English publication. This is an AI-powered fan translation for educational and entertainment purposes.' - }; - } else if (firstChapterUrl.includes('booktoki468.com')) { - novelSpecificConfig = { - title: 'Dungeon Defense', - author: 'Yoo Heonhwa', - originalTitle: '던전 디펜스', - description: 'A dark fantasy novel about survival and strategy in a dungeon world where the protagonist must use cunning and manipulation to survive against overwhelming odds.', - genre: 'Dark Fantasy, Strategy, Psychological', - originalLanguage: 'ko', - seriesName: 'Dungeon Defense', - volumeNumber: 1, - isbn: 'urn:uuid:dungeon-defense-v1', - publisher: 'BookToki (Original)', - translationNotes: 'Translated from Korean web novel published on BookToki. Known for its complex psychological elements and strategic gameplay mechanics.' - }; - } else if (firstChapterUrl.includes('syosetu.com') || firstChapterUrl.includes('ncode.syosetu.com')) { - // Syosetu - Japanese web novel platform - novelSpecificConfig = { - title: 'Web Novel from Syosetu', - author: 'Unknown Syosetu Author', - originalTitle: '小説家になろう作品', - description: 'Japanese web novel from the popular Syosetu platform.', - genre: 'Web Novel, Japanese Literature', - originalLanguage: 'ja', - publisher: 'Syosetu (Original)', - translationNotes: 'Translated from Japanese web novel published on Syosetu (Shōsetsuka ni Narō).' - }; - } else if (firstChapterUrl.includes('novelupdates.com')) { - // Novel Updates - aggregator site - novelSpecificConfig = { - title: 'Novel from Novel Updates', - author: 'Unknown Author', - description: 'Novel sourced from Novel Updates database.', - genre: 'Various', - publisher: 'Novel Updates Community', - translationNotes: 'Novel information sourced from Novel Updates community database.' - }; - } - // Add more novel configurations as needed - } - - return { - ...defaultConfig, - ...novelSpecificConfig, - ...manualConfig - }; -}; - -/** - * Generates a professional title page using novel metadata - */ -const generateTitlePage = (novelConfig: NovelConfig, stats: TranslationStats): string => { - let titlePageHtml = `
-`; - - // Main title - titlePageHtml += `

${escapeXml(novelConfig.title)}

-`; - - // Original title (if different) - if (novelConfig.originalTitle && novelConfig.originalTitle !== novelConfig.title) { - titlePageHtml += `
${escapeXml(novelConfig.originalTitle)}
-`; - } - - // Author - titlePageHtml += `
by ${escapeXml(novelConfig.author)}
-`; - - // Metadata section - titlePageHtml += ` -`; // metadata - titlePageHtml += `
-`; // title-page - - return titlePageHtml; -}; - -/** - * Generates a comprehensive table of contents page with navigation links - */ -const generateTableOfContents = (chapters: ChapterForEpub[], includeStatsPage: boolean): string => { - let tocHtml = `

Table of Contents

\n\n`; - - tocHtml += `
-`; - tocHtml += `

This translation contains ${chapters.length} chapters

-`; - tocHtml += `
\n\n`; - - tocHtml += `
    -`; - - chapters.forEach((chapter, index) => { - const chapterTitle = chapter.translatedTitle || chapter.title || `Chapter ${index + 1}`; - const chapterHref = `chapter-${String(index + 1).padStart(4, '0')}.xhtml`; - - tocHtml += `
  1. -`; - tocHtml += ` ${escapeXml(chapterTitle)} -`; - tocHtml += `
    -`; - tocHtml += ` Translated with ${escapeXml(chapter.usageMetrics.provider)} ${escapeXml(chapter.usageMetrics.model)} -`; - if (chapter.images && chapter.images.length > 0) { - tocHtml += ` • ${chapter.images.length} illustration${chapter.images.length > 1 ? 's' : ''}`; - } - if (chapter.footnotes && chapter.footnotes.length > 0) { - tocHtml += ` • ${chapter.footnotes.length} footnote${chapter.footnotes.length > 1 ? 's' : ''}`; - } - tocHtml += `
    -`; - tocHtml += `
  2. -`; - }); - - // Optionally include special sections at the end - if (includeStatsPage) { - tocHtml += `
  3. -`; - tocHtml += ` Acknowledgments -`; - tocHtml += `
  4. -`; - } - tocHtml += `
-`; - - return tocHtml; -}; - -/** - * Generates a detailed statistics and acknowledgments page - */ -const renderTelemetryInsights = (telemetry?: TelemetryInsights): string => { - if (!telemetry) return ''; - - const formatMs = (ms: number): string => { - if (!Number.isFinite(ms)) return '—'; - if (ms < 1000) return `${ms.toFixed(0)} ms`; - const seconds = ms / 1000; - if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)} s`; - const minutes = seconds / 60; - if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)} min`; - const hours = minutes / 60; - return `${hours.toFixed(2)} h`; - }; - - const renderRow = (label: string, data?: { count: number; totalMs: number; averageMs: number }) => { - if (!data || data.count === 0) return ''; - return ` - - ${label} - ${data.count} - ${formatMs(data.totalMs)} - ${formatMs(data.averageMs)} - `; - }; - - const rows = [ - renderRow('Navigation requests', telemetry.navigation), - renderRow('IndexedDB hydration', telemetry.hydration), - renderRow('Chapter ready-to-read', telemetry.chapterReady), - renderRow('JSON exports', telemetry.exports?.json), - renderRow('EPUB exports', telemetry.exports?.epub) - ].filter(Boolean).join(''); - - let html = `
-`; - html += `

Session Insights

-`; - html += `

Recorded via LexiconForge telemetry during preparation of this EPUB.

-`; - html += `
-`; - html += `
-`; - html += `
${telemetry.totalEvents.toLocaleString()}
-`; - html += `
Telemetry Events
-`; - html += `
-`; - html += `
-`; - html += `
${formatMs(telemetry.sessionDurationMs)}
-`; - html += `
Session Duration
-`; - html += `
-`; - html += `
-`; - - if (rows) { - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += rows; - html += ` -`; - html += `
ActivityOccurrencesTotal DurationAverage Duration
-`; - } - - html += `
- -`; - return html; -}; - -const generateStatsAndAcknowledgments = (stats: TranslationStats, template: EpubTemplate, telemetry?: TelemetryInsights): string => { - let html = `

Acknowledgments

\\n\\n`; - - // Project description - html += `
-`; - html += `

About This Translation

-`; - html += `

${escapeXml(template.projectDescription || '')}

-`; - if (template.githubUrl) { - html += `

Source Code: ${escapeXml(template.githubUrl)}

-`; - } - html += `
\n\n`; - - // Translation statistics - html += `
-`; - html += `

Translation Statistics

-`; - - html += `
-`; - html += `
-`; - html += `
${stats.chapterCount}
-`; - html += `
Chapters
-`; - html += `
-`; - html += `
-`; - html += `
$${stats.totalCost.toFixed(4)}
-`; - html += `
Total Cost
-`; - html += `
-`; - html += `
-`; - html += `
${Math.round(stats.totalTime)}s
-`; - html += `
Total Time
-`; - html += `
-`; - html += `
-`; - html += `
${stats.totalTokens.toLocaleString()}
-`; - html += `
Total Tokens
-`; - html += `
-`; - if (stats.imageCount > 0) { - html += `
-`; - html += `
${stats.imageCount}
-`; - html += `
Images Generated
-`; - html += `
-`; - } - html += `
-`; - html += `
\n\n`; - - html += renderTelemetryInsights(telemetry); - - // Provider breakdown - const providers = Object.keys(stats.providerBreakdown); - if (providers.length > 0) { - html += `
-`; - html += `

Translation Providers Used

-`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - - providers.forEach(provider => { - const providerStats = stats.providerBreakdown[provider]; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - }); - - html += ` -`; - html += `
ProviderChaptersCostTime
${escapeXml(provider)}${providerStats.chapters}$${providerStats.cost.toFixed(4)}${Math.round(providerStats.time)}s
-`; - html += `
\n\n`; - } - - // Model breakdown (top 10 most used) - const models = Object.entries(stats.modelBreakdown) - .sort(([,a], [,b]) => b.chapters - a.chapters) - .slice(0, 10); - - if (models.length > 0) { - html += `
-`; - html += `

AI Models Used

-`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - - models.forEach(([model, modelStats]) => { - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - }); - - html += ` -`; - html += `
ModelChaptersTokens
${escapeXml(model)}${modelStats.chapters}${modelStats.tokens.toLocaleString()}
-`; - html += `
\n\n`; - } - - // Gratitude message - html += `
-`; - html += `

Acknowledgments

-`; - html += `

${escapeXml(template.gratitudeMessage || '')}

-`; - if (template.additionalAcknowledgments) { - html += `

${escapeXml(template.additionalAcknowledgments)}

-`; - } - html += `
\n\n`; - - // Footer - if (template.customFooter) { - html += `
-`; - html += `${escapeXml(template.customFooter)} -`; - html += `
-`; - } - - html += `
-`; - html += `

Translation completed on ${new Date().toLocaleDateString()}

-`; - html += `
-`; - - return html; -}; - -/** - * Converts chapter content with illustrations and footnotes to XHTML suitable for EPUB - */ -const convertChapterToHtml = (chapter: ChapterForEpub): string => { - let htmlContent = chapter.translatedTitle ? - `

${escapeXml(chapter.translatedTitle)}

\n\n` : - `

${escapeXml(chapter.title)}

\n\n`; - - // Get the translated content, fallback to original if needed - let content = chapter.content; - - // Process content and embed images - if (chapter.images.length > 0) { - // Replace illustration markers with actual images - for (const image of chapter.images) { - const imgHtml = `
- \"${escapeXml(image.prompt)}\" -

${escapeXml(image.prompt)}

-
`; - - content = content.replace(image.marker, imgHtml); - } - } - - // Process and embed footnotes - if (chapter.footnotes && chapter.footnotes.length > 0) { - // Replace footnote markers with links - for (const footnote of chapter.footnotes) { - const footnoteLink = `[${footnote.marker}]`; - content = content.replace(`[${footnote.marker}]`, footnoteLink); - } - - // Add footnotes section at the end - let footnotesHtml = '
\n

Footnotes

\n
    \n'; - for (const footnote of chapter.footnotes) { - footnotesHtml += `
  1. -`; - footnotesHtml += ` ${escapeXml(footnote.text)} -`; - footnotesHtml += ` -`; - footnotesHtml += `
  2. \n`; - } - footnotesHtml += '
\n
\n'; - content += '\n' + footnotesHtml; - } - - // Convert content to proper XHTML paragraphs - content = convertToXhtmlParagraphs(content); - - htmlContent += content; - - return htmlContent; -}; - -/** - * Converts text content to proper XHTML paragraphs without invalid nesting - */ -const convertToXhtmlParagraphs = (content: string): string => { - // First, escape any remaining unescaped XML entities - content = content.replace(/&(?!(amp|lt|gt|quot|apos);)/g, '&'); - - // Split content by double newlines to create paragraphs - const paragraphs = content.split(/\n\s*\n/); - - let xhtmlContent = ''; - - for (let para of paragraphs) { - para = para.trim(); - if (!para) continue; - - // Check if this paragraph already contains block-level HTML elements - const hasBlockElements = /<(div|p|h[1-6]|ul|ol|li|blockquote|pre|hr|table|form|fieldset|address|center)[^>]*>/i.test(para); - - if (hasBlockElements) { - // Already has block elements, just add it as-is but fix line breaks - para = para.replace(/\n/g, ' '); // Convert single line breaks to spaces within block elements - xhtmlContent += para + '\n\n'; - } else { - // Regular text paragraph - wrap in

and convert line breaks to
- para = para.replace(/\n/g, '
'); // Use self-closing br tags for XHTML - xhtmlContent += `

${para}

\n\n`; - } - } - - return xhtmlContent.trim(); -}; - -/** - * Build chapter XHTML using DOM nodes (footnotes visible inline and at end) - */ -const buildChapterXhtml = (chapter: ChapterForEpub): string => { - const root = document.createElement('div'); - // Title - const h1 = document.createElement('h1'); - h1.textContent = chapter.translatedTitle || chapter.title; - root.appendChild(h1); - - // 1) Inject placeholders for markers - const withIllu = chapter.content.replace(/\[(ILLUSTRATION-\d+[A-Za-z]*) \]/g, (_m, marker) => { - return ``; - }); - const withPlaceholders = withIllu.replace(/\[(\d+)\]/g, (_m, n) => ``); - - // 2) Sanitize with tight allowlist to preserve inline tags safely - const sanitized = sanitizeHtmlAllowlist(withPlaceholders); - - // 3) Materialize into a working container and normalize newlines to
- const container = document.createElement('div'); - container.innerHTML = sanitized; - convertNewlinesToBrInElement(container); - - // 4) Replace placeholders with generated illustration blocks and footnote refs - const imagesByMarker = new Map( - chapter.images.map(i => [i.marker, i]) - ); - for (const span of Array.from(container.querySelectorAll('span[data-illu]'))) { - const marker = (span as HTMLElement).getAttribute('data-illu') || ''; - const img = imagesByMarker.get(`[${marker}]`) || imagesByMarker.get(marker); - if (img) { - const wrap = document.createElement('div'); - wrap.setAttribute('class', 'illustration'); - const im = document.createElement('img'); - im.setAttribute('src', img.imageData); - im.setAttribute('alt', img.prompt); - im.setAttribute('style', 'max-width: 100%; height: auto; display: block; margin: 1em auto;'); - const cap = document.createElement('p'); - cap.setAttribute('class', 'illustration-caption'); - cap.setAttribute('style', 'text-align: center; font-style: italic; color: #666; font-size: 0.9em; margin-top: 0.5em;'); - cap.textContent = img.prompt; - wrap.appendChild(im); - wrap.appendChild(cap); - span.replaceWith(wrap); - } else { - // If missing, remove placeholder - span.remove(); - } - } - for (const span of Array.from(container.querySelectorAll('span[data-fn]'))) { - const num = (span as HTMLElement).getAttribute('data-fn') || ''; - const sup = document.createElement('sup'); - const a = document.createElement('a'); - a.setAttribute('href', `#fn${num}`); - a.setAttribute('class', 'footnote-ref'); - a.setAttribute('id', `fnref${num}`); - a.setAttribute('epub:type', 'noteref'); - a.textContent = `[${num}]`; - sup.appendChild(a); - span.replaceWith(sup); - } - - // 5) Append sanitized content under title - while (container.firstChild) root.appendChild(container.firstChild); - - // 6) Footnotes section at end - if (chapter.footnotes && chapter.footnotes.length > 0) { - const div = document.createElement('div'); - div.setAttribute('class', 'footnotes'); - const h3 = document.createElement('h3'); - h3.textContent = 'Footnotes'; - const ol = document.createElement('ol'); - div.appendChild(h3); - div.appendChild(ol); - for (const fn of chapter.footnotes) { - const num = String(fn.marker).replace(/^\ \[|\]$/g, ''); - const li = document.createElement('li'); - li.setAttribute('id', `fn${num}`); - li.setAttribute('epub:type', 'footnote'); - - // Allow limited inline HTML inside footnotes (e.g., , ,
) - try { - const safeHtml = sanitizeHtmlAllowlist(fn.text || ''); - if (safeHtml) { - const temp = document.createElement('div'); - temp.innerHTML = safeHtml; - while (temp.firstChild) li.appendChild(temp.firstChild); - li.appendChild(document.createTextNode(' ')); - } else { - li.appendChild(document.createTextNode((fn.text || '') + ' ')); - } - } catch { - li.appendChild(document.createTextNode((fn.text || '') + ' ')); - } - - const back = document.createElement('a'); - back.setAttribute('href', `#fnref${num}`); - back.setAttribute('class', 'footnote-backref'); - back.setAttribute('epub:type', 'backlink'); - back.textContent = '↩'; - li.appendChild(back); - ol.appendChild(li); - } - root.appendChild(div); - } - - // 7) XHTML serialization - return htmlFragmentToXhtml(toStrictXhtml(root.innerHTML)); -}; -/** - * Escape HTML characters to prevent XSS and formatting issues (kept for backward compatibility) - */ -const escapeHtml = (text: string): string => { - const div = document.createElement('div'); - div.textContent = text; - return div.innerHTML; -}; +import { + ChapterForEpub, + EpubExportOptions, + TranslationStats, + TelemetryInsights, + NovelConfig, + EpubTemplate, + EpubChapter, + EpubMeta +} from './epubService/types'; +import { + getDefaultTemplate, + createCustomTemplate +} from './epubService/templates/defaults'; +import { getNovelConfig } from './epubService/templates/novelConfig'; +import { calculateTranslationStats } from './epubService/data/stats'; +import { collectActiveVersions } from './epubService/data/collector'; +import { generateTitlePage } from './epubService/generators/titlePage'; +import { generateTableOfContents } from './epubService/generators/toc'; +import { + renderTelemetryInsights, + generateStatsAndAcknowledgments +} from './epubService/generators/statsPage'; +import { + buildChapterXhtml, +} from './epubService/generators/chapter'; +import { htmlFragmentToXhtml } from './epubService/sanitizers/xhtmlSanitizer'; +import { generateEpub3WithJSZip } from './epubService/packagers/epubPackager'; + +// Re-export types for consumers +export type { + ChapterForEpub, + TranslationStats, + TelemetryInsights, + NovelConfig, + EpubTemplate, + EpubExportOptions, + EpubChapter, + EpubMeta +}; + +export { + getDefaultTemplate, + createCustomTemplate, + getNovelConfig, + calculateTranslationStats, + collectActiveVersions +}; + +// Re-export specific generators if needed by tests +export { renderTelemetryInsights }; /** * Generates and downloads an EPUB file from the collected chapters using JSZip (browser-compatible) @@ -1313,6 +87,7 @@ export const generateEpub = async (options: EpubExportOptions): Promise => const includeStats = (options as any).includeStatsPage !== false; const tableOfContents = generateTableOfContents(options.chapters, includeStats); const statsAndAcknowledgments = generateStatsAndAcknowledgments(stats, template, options.telemetryInsights); + // Ensure special pages are XHTML-safe const titlePageXhtml = htmlFragmentToXhtml(titlePage); const tocXhtml = htmlFragmentToXhtml(tableOfContents); @@ -1324,6 +99,7 @@ export const generateEpub = async (options: EpubExportOptions): Promise => chapters.push({ id: 'title-page', title: 'Title Page', xhtml: titlePageXhtml, href: 'title.xhtml' }); } chapters.push({ id: 'toc-page', title: 'Table of Contents', xhtml: tocXhtml, href: 'toc.xhtml' }); + options.chapters.forEach((chapter, index) => { chapters.push({ id: `ch-${String(index + 1).padStart(3, '0')}`, @@ -1332,6 +108,7 @@ export const generateEpub = async (options: EpubExportOptions): Promise => href: `chapter-${String(index + 1).padStart(4, '0')}.xhtml` }); }); + if (includeStats) { chapters.push({ id: 'stats-page', title: 'Acknowledgments', xhtml: statsXhtml, href: 'stats.xhtml' }); } @@ -1391,387 +168,3 @@ export const generateEpub = async (options: EpubExportOptions): Promise => throw new Error(`EPUB generation failed: ${error}`); } }; - -// JSZip-based EPUB3 generation types and functions -export interface EpubChapter { - id: string; - title: string; - xhtml: string; - href: string; -} - -export interface EpubMeta { - title: string; - author: string; - description?: string; - language?: string; - identifier?: string; - publisher?: string; -} - -/** - * Generates EPUB3-compliant ZIP file using JSZip (browser-compatible) - */ -const generateEpub3WithJSZip = async (meta: EpubMeta, chapters: EpubChapter[]): Promise => { - const lang = meta.language || 'en'; - const bookId = meta.identifier || `urn:uuid:${crypto.randomUUID()}`; - - // EPUB3 directory structure - const oebps = 'OEBPS'; - const textDir = `${oebps}/text`; - const stylesDir = `${oebps}/styles`; - const imagesDir = `${oebps}/images`; - - // Helper to wrap content in XHTML - const xhtmlWrap = (title: string, body: string) => ` - - - - - ${escapeXml(title)} - - - - ${body} - -`; - - // Generate navigation document (EPUB3 requirement) - const navXhtml = ` - - - - - Table of Contents - - - - - -`; - - // Generate manifest items for content.opf - const manifestItems = chapters.map(ch => - `` - ).join('\n '); - - // Generate spine items for content.opf - const spineItems = chapters.map(ch => - `` - ).join('\n '); - - // Content.opf (package document) - const contentOpf = ` - - - ${escapeXml(bookId)} - ${escapeXml(meta.title)} - ${lang} - ${meta.author ? `${escapeXml(meta.author)}` : ''} - ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} - ${meta.description ? `${escapeXml(meta.description)}` : ''} - ${new Date().toISOString()} - - - - - ${manifestItems} - - - ${spineItems} - -`; - - // Container.xml (required EPUB metadata) - const containerXml = ` - - - - -`; - - // Professional CSS styling (preserved from original) - const stylesheet = ` -body { - font-family: Georgia, serif; - line-height: 1.6; - max-width: 42em; - margin: 0 auto; - padding: 1.5em; - color: #333; -} -h1 { - color: #2c3e50; - border-bottom: 2px solid #3498db; - padding-bottom: 0.5em; - margin-bottom: 1em; - font-weight: bold; -} -h2 { - color: #27ae60; - border-bottom: 1px solid #27ae60; - padding-bottom: 0.3em; - margin-top: 2em; - margin-bottom: 1em; -} -h3 { - color: #8e44ad; - margin-top: 1.5em; - margin-bottom: 0.75em; -} -p { - margin: 1em 0; - text-align: justify; - text-indent: 1.5em; -} -.illustration { - page-break-inside: avoid; - margin: 2em 0; - text-align: center; -} -.illustration img { - max-width: 100%; - height: auto; - border: 1px solid #ddd; - border-radius: 4px; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); -} -.illustration-caption { - font-style: italic; - color: #666; - text-align: center; - font-size: 0.9em; - margin-top: 0.5em; - text-indent: 0; -} -table { - width: 100%; - border-collapse: collapse; - margin: 1em 0; - font-size: 0.9em; -} -th, td { - border: 1px solid #ddd; - padding: 0.75em; - text-align: left; -} -th { - background-color: #f8f9fa; - font-weight: bold; -} -ol, ul { - margin: 1em 0; - padding-left: 2em; -} -li { - margin-bottom: 0.5em; - line-height: 1.5; -} -.gratitude-section { - background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); - color: white; - padding: 2em; - border-radius: 12px; - margin: 3em 0; -} -.gratitude-section h2 { - color: white; - border-bottom: 1px solid rgba(255,255,255,0.3); - text-align: center; -} -.gratitude-section p { - text-indent: 0; -} -/* Footnotes styling */ -.footnotes { - margin-top: 3em; - padding-top: 2em; - border-top: 1px solid #ddd; -} -.footnotes h3 { - color: #666; - font-size: 1.1em; - margin-bottom: 1em; -} -.footnotes ol { - font-size: 0.9em; - line-height: 1.4; -} -.footnotes li { - margin-bottom: 0.75em; -} -.footnote-ref { - font-size: 0.8em; - vertical-align: super; - text-decoration: none; - color: #007bff; - font-weight: bold; -} -.footnote-backref { - margin-left: 0.5em; - font-size: 0.8em; - text-decoration: none; - color: #007bff; -} -.footnote-ref:hover, .footnote-backref:hover { - text-decoration: underline; -} -/* Title page specific styling */ -.title-page { - text-align: center; - padding: 4em 2em; - page-break-after: always; -} -.title-page h1 { - font-size: 3em; - margin-bottom: 0.5em; - color: #2c3e50; - border: none; - padding: 0; -} -.title-page .subtitle { - font-size: 1.5em; - color: #7f8c8d; - font-style: italic; - margin-bottom: 2em; -} -.title-page .author { - font-size: 1.25em; - color: #34495e; - margin-bottom: 1em; -} -.title-page .metadata { - margin-top: 3em; - font-size: 0.9em; - color: #666; - line-height: 1.6; -} -.title-page .metadata p { - text-indent: 0; - margin: 0.5em 0; -}`; - - // Extract data:image payloads from chapter XHTML and rewrite to packaged image files - type ImgEntry = { href: string; mediaType: string; base64: string; id: string }; - const processedChapters: { ch: EpubChapter; xhtml: string }[] = []; - const imageEntries: ImgEntry[] = []; - let imgIndex = 1; - const dataImgRegex = /(]*?src=")(data:(image\/[A-Za-z0-9.+-]+);base64,([A-Za-z0-9+/=]+))(\"[^>]*>)/g; - - for (const ch of chapters) { - let xhtml = ch.xhtml; - xhtml = xhtml.replace(dataImgRegex, (_m, p1, _src, mime, b64, p5) => { - const ext = mime.endsWith('jpeg') ? 'jpg' : (mime.split('/')[1] || 'png'); - const filename = `img-${String(imgIndex).padStart(4, '0')}.${ext}`; - const href = `images/${filename}`; - const id = `img${imgIndex}`; - imageEntries.push({ href, mediaType: mime, base64: b64, id }); - imgIndex++; - return `${p1}../${href}${p5}`; - }); - processedChapters.push({ ch, xhtml }); - } - - // Build manifest and spine including images - const manifestItemsText = processedChapters.map(({ ch }) => - `` - ).join('\n '); - const manifestItemsImages = imageEntries.map(img => - `` - ).join('\n '); - const spineItems2 = processedChapters.map(({ ch }) => ``).join('\n '); - - const contentOpf2 = ` - - - ${escapeXml(bookId)} - ${escapeXml(meta.title)} - ${lang} - ${meta.author ? `${escapeXml(meta.author)}` : ''} - ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} - ${meta.description ? `${escapeXml(meta.description)}` : ''} - ${new Date().toISOString()} - - - - - ${manifestItemsText} - ${manifestItemsImages ? `\n ${manifestItemsImages}` : ''} - - - ${spineItems2} - -`; - - // Create ZIP with JSZip - const zip = new JSZip(); - - // Add mimetype (must be first and uncompressed) - zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' }); - - // Add META-INF - zip.file('META-INF/container.xml', containerXml); - - // Add OEBPS content - zip.file(`${oebps}/content.opf`, contentOpf2); - zip.file(`${textDir}/nav.xhtml`, navXhtml); - zip.file(`${stylesDir}/stylesheet.css`, stylesheet); - - // Add processed chapter files and extracted images (with optional strict XML parse diagnostics) - const parseErrors: string[] = []; - for (const { ch, xhtml } of processedChapters) { - const wrapped = xhtmlWrap(ch.title, xhtml); - try { - const parser = new DOMParser(); - const doc = parser.parseFromString(wrapped, 'application/xhtml+xml'); - const hasError = - doc.getElementsByTagName('parsererror').length > 0 || - doc.getElementsByTagNameNS('*', 'parsererror').length > 0; - if (hasError) { - const txt = doc.documentElement.textContent || ''; - const msg = `[ParseError] ${ch.href}: ${txt.slice(0, 300)}`; - console.warn(msg); - parseErrors.push(msg); - } - } catch {} - zip.file(`${textDir}/${ch.href}`, wrapped); - } - for (const img of imageEntries) { - zip.file(`${oebps}/${img.href}`, img.base64, { base64: true }); - } - - // Attach diagnostics when parse errors are detected - if (parseErrors.length > 0) { - zip.file(`${oebps}/debug/parse-errors.txt`, parseErrors.join('\n')); - processedChapters.forEach(({ ch, xhtml }) => { - zip.file(`${oebps}/debug/text/${ch.href}.raw.xhtml`, xhtml); - }); - } - - // Generate and return ArrayBuffer - return await zip.generateAsync({ - type: 'arraybuffer', - mimeType: 'application/epub+zip' - }); -}; - -/** - * Escapes XML characters to prevent formatting issues - */ -const escapeXml = (text: string): string => { - return text - .replace(/&/g, '&') - .replace(//g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); -}; - -// toStrictXhtml is imported from services/translate/HtmlSanitizer - -// sanitizeHtml function removed - unused in epubService diff --git a/services/epubService/data/collector.ts b/services/epubService/data/collector.ts new file mode 100644 index 0000000..7a27e09 --- /dev/null +++ b/services/epubService/data/collector.ts @@ -0,0 +1,128 @@ +import { SessionChapterData } from '../../../types'; +import { ChapterForEpub } from '../types'; + +/** + * Creates a ChapterForEpub object from session data + */ +export const createChapterForEpub = (data: any, url: string): ChapterForEpub => { + // Create default metrics for chapters missing usage data + let metrics = data.translationResult.usageMetrics; + + if (!metrics) { + console.warn(`[EPUBService] Chapter ${url} missing usageMetrics - using defaults for statistics`); + metrics = { + totalTokens: 0, + promptTokens: 0, + completionTokens: 0, + estimatedCost: 0, + requestTime: 0, + provider: 'Unknown', + model: 'Unknown' + }; + } else { + // Validate and fix invalid metrics values + const fixedMetrics = { + totalTokens: typeof metrics.totalTokens === 'number' && isFinite(metrics.totalTokens) ? metrics.totalTokens : 0, + promptTokens: typeof metrics.promptTokens === 'number' && isFinite(metrics.promptTokens) ? metrics.promptTokens : 0, + completionTokens: typeof metrics.completionTokens === 'number' && isFinite(metrics.completionTokens) ? metrics.completionTokens : 0, + estimatedCost: typeof metrics.estimatedCost === 'number' && isFinite(metrics.estimatedCost) ? metrics.estimatedCost : 0, + requestTime: typeof metrics.requestTime === 'number' && isFinite(metrics.requestTime) ? metrics.requestTime : 0, + provider: typeof metrics.provider === 'string' ? metrics.provider : 'Unknown', + model: typeof metrics.model === 'string' ? metrics.model : 'Unknown' + }; + + // Check if we had to fix any values + const hadInvalidData = Object.keys(metrics).some(key => + metrics[key] !== fixedMetrics[key] + ); + + if (hadInvalidData) { + console.warn(`[EPUBService] Chapter ${url} had invalid usageMetrics - fixed for statistics:`, { + original: metrics, + fixed: fixedMetrics + }); + } + + metrics = fixedMetrics; + } + + // Get images from translation result + const images = data.translationResult.suggestedIllustrations?.map((illust: any) => ({ + marker: illust.placementMarker, + imageData: illust.url || '', // This should be base64 data from generation + prompt: illust.imagePrompt + })) || []; + + // Get footnotes from translation result + const footnotes = data.translationResult.footnotes?.map((footnote: any) => ({ + marker: footnote.marker, + text: footnote.text + })) || []; + + const translatedContent = data.translationResult.translation || ''; + + return { + title: data.chapter.title, + originalTitle: data.chapter.originalTitle || data.chapter.title, + content: data.chapter.content, + originalUrl: url, + url, + translatedTitle: data.translationResult.translatedTitle, + translatedContent, + prevUrl: data.chapter.prevUrl ?? null, + nextUrl: data.chapter.nextUrl ?? null, + usageMetrics: { + totalTokens: metrics.totalTokens, + promptTokens: metrics.promptTokens, + completionTokens: metrics.completionTokens, + estimatedCost: metrics.estimatedCost, + requestTime: metrics.requestTime, + provider: metrics.provider, + model: metrics.model, + }, + images: images.filter((img: any) => img.imageData), // Only include images with data + footnotes: footnotes + }; +}; + +/** + * Collects active version chapters from session data for EPUB export + * Uses activeVersion tracking to determine which translation to include + */ +export const collectActiveVersions = ( + sessionData: Record, + urlHistory: string[] +): ChapterForEpub[] => { + const chapters: ChapterForEpub[] = []; + + // Use urlHistory for ordering, but also include any chapters not in history + // First, process chapters in urlHistory order to maintain chronological sequence + const processedUrls = new Set(); + + // Add chapters from urlHistory first (in order) + for (const url of urlHistory) { + if (sessionData[url]?.chapter && sessionData[url]?.translationResult) { + processedUrls.add(url); + const data = sessionData[url]; + chapters.push(createChapterForEpub(data, url)); + } + } + + // Then add any remaining chapters not in urlHistory (sorted by URL for consistency) + const remainingUrls = Object.keys(sessionData) + .filter(url => !processedUrls.has(url)) + .sort(); + + for (const url of remainingUrls) { + const data = sessionData[url]; + if (!data?.chapter || !data?.translationResult) { + console.log(`[EPUBService] Skipping ${url} - missing chapter or translation result`); + continue; + } + + chapters.push(createChapterForEpub(data, url)); + } + + console.log(`[EPUBService] Prepared ${chapters.length} chapters for EPUB in chronological order`); + return chapters; +}; diff --git a/services/epubService/data/stats.ts b/services/epubService/data/stats.ts new file mode 100644 index 0000000..b24b5c5 --- /dev/null +++ b/services/epubService/data/stats.ts @@ -0,0 +1,58 @@ +import { ChapterForEpub, TranslationStats } from '../types'; + +/** + * Calculates comprehensive statistics from collected chapters + */ +export const calculateTranslationStats = (chapters: ChapterForEpub[]): TranslationStats => { + const stats: TranslationStats = { + totalCost: 0, + totalTime: 0, + totalTokens: 0, + chapterCount: chapters.length, + imageCount: 0, + providerBreakdown: {}, + modelBreakdown: {} + }; + + chapters.forEach(chapter => { + const metrics = chapter.usageMetrics; + + // Aggregate totals + stats.totalCost += metrics.estimatedCost; + stats.totalTime += metrics.requestTime; + stats.totalTokens += metrics.totalTokens; + stats.imageCount += chapter.images.length; + + // Provider breakdown + if (!stats.providerBreakdown[metrics.provider]) { + stats.providerBreakdown[metrics.provider] = { + chapters: 0, + cost: 0, + time: 0, + tokens: 0 + }; + } + const providerStats = stats.providerBreakdown[metrics.provider]; + providerStats.chapters += 1; + providerStats.cost += metrics.estimatedCost; + providerStats.time += metrics.requestTime; + providerStats.tokens += metrics.totalTokens; + + // Model breakdown + if (!stats.modelBreakdown[metrics.model]) { + stats.modelBreakdown[metrics.model] = { + chapters: 0, + cost: 0, + time: 0, + tokens: 0 + }; + } + const modelStats = stats.modelBreakdown[metrics.model]; + modelStats.chapters += 1; + modelStats.cost += metrics.estimatedCost; + modelStats.time += metrics.requestTime; + modelStats.tokens += metrics.totalTokens; + }); + + return stats; +}; diff --git a/services/epubService/generators/chapter.ts b/services/epubService/generators/chapter.ts new file mode 100644 index 0000000..13abc4d --- /dev/null +++ b/services/epubService/generators/chapter.ts @@ -0,0 +1,212 @@ +import { ChapterForEpub } from '../types'; +import { + sanitizeHtmlAllowlist, + toStrictXhtml, + convertNewlinesToBrInElement, + htmlFragmentToXhtml, + escapeXml +} from '../sanitizers/xhtmlSanitizer'; + +/** + * Converts chapter content with illustrations and footnotes to XHTML suitable for EPUB + */ +export const convertChapterToHtml = (chapter: ChapterForEpub): string => { + let htmlContent = chapter.translatedTitle ? + `

${escapeXml(chapter.translatedTitle)}

\n\n` : + `

${escapeXml(chapter.title)}

\n\n`; + + // Get the translated content, fallback to original if needed + let content = chapter.content; + + // Process content and embed images + if (chapter.images.length > 0) { + // Replace illustration markers with actual images + for (const image of chapter.images) { + const imgHtml = `
+ ${escapeXml(image.prompt)} +

${escapeXml(image.prompt)}

+
`; + + content = content.replace(image.marker, imgHtml); + } + } + + // Process and embed footnotes + if (chapter.footnotes && chapter.footnotes.length > 0) { + // Replace footnote markers with links + for (const footnote of chapter.footnotes) { + const footnoteLink = `[${footnote.marker}]`; + content = content.replace(`[${footnote.marker}]`, footnoteLink); + } + + // Add footnotes section at the end + let footnotesHtml = `
+

Footnotes

+
    +`; + for (const footnote of chapter.footnotes) { + footnotesHtml += `
  1. +`; + footnotesHtml += ` ${escapeXml(footnote.text)} +`; + footnotesHtml += ` +`; + footnotesHtml += `
  2. \n`; + } + footnotesHtml += `
+
+`; + content += '\n' + footnotesHtml; + } + + // Convert content to proper XHTML paragraphs + content = convertToXhtmlParagraphs(content); + + htmlContent += content; + + return htmlContent; +}; + +/** + * Converts text content to proper XHTML paragraphs without invalid nesting + */ +export const convertToXhtmlParagraphs = (content: string): string => { + // First, escape any remaining unescaped XML entities + content = content.replace(/&(?!(amp|lt|gt|quot|apos);)/g, '&'); + + // Split content by double newlines to create paragraphs + const paragraphs = content.split(/\n\s*\n/); + + let xhtmlContent = ''; + + for (let para of paragraphs) { + para = para.trim(); + if (!para) continue; + + // Check if this paragraph already contains block-level HTML elements + const hasBlockElements = /<(div|p|h[1-6]|ul|ol|li|blockquote|pre|hr|table|form|fieldset|address|center)[^>]*>/i.test(para); + + if (hasBlockElements) { + // Already has block elements, just add it as-is but fix line breaks + para = para.replace(/\n/g, ' '); // Convert single line breaks to spaces within block elements + xhtmlContent += para + '\n\n'; + } else { + // Regular text paragraph - wrap in

and convert line breaks to
+ para = para.replace(/\n/g, '
'); // Use self-closing br tags for XHTML + xhtmlContent += `

${para}

\n\n`; + } + } + + return xhtmlContent.trim(); +}; + +/** + * Build chapter XHTML using DOM nodes (footnotes visible inline and at end) + */ +export const buildChapterXhtml = (chapter: ChapterForEpub): string => { + const root = document.createElement('div'); + // Title + const h1 = document.createElement('h1'); + h1.textContent = chapter.translatedTitle || chapter.title; + root.appendChild(h1); + + // 1) Inject placeholders for markers + const withIllu = chapter.content.replace(/\b(ILLUSTRATION-\d+[A-Za-z]*)\b/g, (_m, marker) => { + return ``; + }); + const withPlaceholders = withIllu.replace(/\((\d+)\)/g, (_m, n) => ``); + + // 2) Sanitize with tight allowlist to preserve inline tags safely + const sanitized = sanitizeHtmlAllowlist(withPlaceholders); + + // 3) Materialize into a working container and normalize newlines to
+ const container = document.createElement('div'); + container.innerHTML = sanitized; + convertNewlinesToBrInElement(container); + + // 4) Replace placeholders with generated illustration blocks and footnote refs + const imagesByMarker = new Map( + chapter.images.map(i => [i.marker, i]) + ); + for (const span of Array.from(container.querySelectorAll('span[data-illu]'))) { + const marker = (span as HTMLElement).getAttribute('data-illu') || ''; + const img = imagesByMarker.get(`[${marker}]`) || imagesByMarker.get(marker); + if (img) { + const wrap = document.createElement('div'); + wrap.setAttribute('class', 'illustration'); + const im = document.createElement('img'); + im.setAttribute('src', img.imageData); + im.setAttribute('alt', img.prompt); + im.setAttribute('style', 'max-width: 100%; height: auto; display: block; margin: 1em auto;'); + const cap = document.createElement('p'); + cap.setAttribute('class', 'illustration-caption'); + cap.setAttribute('style', 'text-align: center; font-style: italic; color: #666; font-size: 0.9em; margin-top: 0.5em;'); + cap.textContent = img.prompt; + wrap.appendChild(im); + wrap.appendChild(cap); + span.replaceWith(wrap); + } else { + // If missing, remove placeholder + span.remove(); + } + } + for (const span of Array.from(container.querySelectorAll('span[data-fn]'))) { + const num = (span as HTMLElement).getAttribute('data-fn') || ''; + const sup = document.createElement('sup'); + const a = document.createElement('a'); + a.setAttribute('href', `#fn${num}`); + a.setAttribute('class', 'footnote-ref'); + a.setAttribute('id', `fnref${num}`); + a.setAttribute('epub:type', 'noteref'); + a.textContent = `[${num}]`; + sup.appendChild(a); + span.replaceWith(sup); + } + + // 5) Append sanitized content under title + while (container.firstChild) root.appendChild(container.firstChild); + + // 6) Footnotes section at end + if (chapter.footnotes && chapter.footnotes.length > 0) { + const div = document.createElement('div'); + div.setAttribute('class', 'footnotes'); + const h3 = document.createElement('h3'); + h3.textContent = 'Footnotes'; + const ol = document.createElement('ol'); + div.appendChild(h3); + div.appendChild(ol); + for (const fn of chapter.footnotes) { + const num = String(fn.marker).replace(/^\ \[|\ \]$/g, ''); + const li = document.createElement('li'); + li.setAttribute('id', `fn${num}`); + li.setAttribute('epub:type', 'footnote'); + + // Allow limited inline HTML inside footnotes (e.g., , ,
) + try { + const safeHtml = sanitizeHtmlAllowlist(fn.text || ''); + if (safeHtml) { + const temp = document.createElement('div'); + temp.innerHTML = safeHtml; + while (temp.firstChild) li.appendChild(temp.firstChild); + li.appendChild(document.createTextNode(' ')); + } else { + li.appendChild(document.createTextNode((fn.text || '') + ' ')); + } + } catch { + li.appendChild(document.createTextNode((fn.text || '') + ' ')); + } + + const back = document.createElement('a'); + back.setAttribute('href', `#fnref${num}`); + back.setAttribute('class', 'footnote-backref'); + back.setAttribute('epub:type', 'backlink'); + back.textContent = '↩'; + li.appendChild(back); + ol.appendChild(li); + } + root.appendChild(div); + } + + // 7) XHTML serialization + return htmlFragmentToXhtml(toStrictXhtml(root.innerHTML)); +}; diff --git a/services/epubService/generators/statsPage.ts b/services/epubService/generators/statsPage.ts new file mode 100644 index 0000000..2606789 --- /dev/null +++ b/services/epubService/generators/statsPage.ts @@ -0,0 +1,213 @@ +import { TranslationStats, EpubTemplate, TelemetryInsights } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; +import { renderTelemetryInsights } from './telemetryInsights'; + +export { renderTelemetryInsights } from './telemetryInsights'; + +/** + * Generates a detailed statistics and acknowledgments page + */ +export const generateStatsAndAcknowledgments = (stats: TranslationStats, template: EpubTemplate, telemetry?: TelemetryInsights): string => { + let html = `

Acknowledgments

\n\n`; + + // Project description + html += `
+`; + html += `

About This Translation

+`; + html += `

${escapeXml(template.projectDescription || '')}

+`; + if (template.githubUrl) { + html += `

Source Code: ${escapeXml(template.githubUrl)}

+`; + } + html += `
\n\n`; + + // Translation statistics + html += `
+`; + html += `

Translation Statistics

+`; + + html += `
+`; + html += `
+`; + html += `
${stats.chapterCount}
+`; + html += `
Chapters
+`; + html += `
+`; + html += `
+`; + html += `
$${stats.totalCost.toFixed(4)}
+`; + html += `
Total Cost
+`; + html += `
+`; + html += `
+`; + html += `
${Math.round(stats.totalTime)}s
+`; + html += `
Total Time
+`; + html += `
+`; + html += `
+`; + html += `
${stats.totalTokens.toLocaleString()}
+`; + html += `
Total Tokens
+`; + html += `
+`; + if (stats.imageCount > 0) { + html += `
+`; + html += `
${stats.imageCount}
+`; + html += `
Images Generated
+`; + html += `
+`; + } + html += `
+`; + html += `
\n\n`; + + html += renderTelemetryInsights(telemetry); + + // Provider breakdown + const providers = Object.keys(stats.providerBreakdown); + if (providers.length > 0) { + html += `
+`; + html += `

Translation Providers Used

+`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + + providers.forEach(provider => { + const providerStats = stats.providerBreakdown[provider]; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + }); + + html += ` +`; + html += `
ProviderChaptersCostTime
${escapeXml(provider)}${providerStats.chapters}$${providerStats.cost.toFixed(4)}${Math.round(providerStats.time)}s
+`; + html += `
\n\n`; + } + + // Model breakdown (top 10 most used) + const models = Object.entries(stats.modelBreakdown) + .sort(([,a], [,b]) => b.chapters - a.chapters) + .slice(0, 10); + + if (models.length > 0) { + html += `
+`; + html += `

AI Models Used

+`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + + models.forEach(([model, modelStats]) => { + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + }); + + html += ` +`; + html += `
ModelChaptersTokens
${escapeXml(model)}${modelStats.chapters}${modelStats.tokens.toLocaleString()}
+`; + html += `
\n\n`; + } + + // Gratitude message + html += `
+`; + html += `

Acknowledgments

+`; + html += `

${escapeXml(template.gratitudeMessage || '')}

+`; + if (template.additionalAcknowledgments) { + html += `

${escapeXml(template.additionalAcknowledgments)}

+`; + } + html += `
\n\n`; + + // Footer + if (template.customFooter) { + html += `
+`; + html += `${escapeXml(template.customFooter)} +`; + html += `
+`; + } + + html += `
+`; + html += `

Translation completed on ${new Date().toLocaleDateString()}

+`; + html += `
+`; + + return html; +}; diff --git a/services/epubService/generators/telemetryInsights.ts b/services/epubService/generators/telemetryInsights.ts new file mode 100644 index 0000000..d503f79 --- /dev/null +++ b/services/epubService/generators/telemetryInsights.ts @@ -0,0 +1,98 @@ +import type { TelemetryInsights } from '../types'; + +export const renderTelemetryInsights = (telemetry?: TelemetryInsights): string => { + if (!telemetry) return ''; + + const formatMs = (ms: number): string => { + if (!Number.isFinite(ms)) return '—'; + if (ms < 1000) return `${ms.toFixed(0)} ms`; + const seconds = ms / 1000; + if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)} s`; + const minutes = seconds / 60; + if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)} min`; + const hours = minutes / 60; + return `${hours.toFixed(2)} h`; + }; + + const renderRow = (label: string, data?: { count: number; totalMs: number; averageMs: number }) => { + if (!data || data.count === 0) return ''; + return ` + + ${label} + ${data.count} + ${formatMs(data.totalMs)} + ${formatMs(data.averageMs)} + `; + }; + + const rows = [ + renderRow('Navigation requests', telemetry.navigation), + renderRow('IndexedDB hydration', telemetry.hydration), + renderRow('Chapter ready-to-read', telemetry.chapterReady), + renderRow('JSON exports', telemetry.exports?.json), + renderRow('EPUB exports', telemetry.exports?.epub), + ] + .filter(Boolean) + .join(''); + + let html = `
+`; + html += `

Session Insights

+`; + html += `

Recorded via LexiconForge telemetry during preparation of this EPUB.

+`; + html += `
+`; + html += `
+`; + html += `
${telemetry.totalEvents.toLocaleString()}
+`; + html += `
Telemetry Events
+`; + html += `
+`; + html += `
+`; + html += `
${formatMs(telemetry.sessionDurationMs)}
+`; + html += `
Session Duration
+`; + html += `
+`; + html += `
+`; + + if (rows) { + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += rows; + html += ` +`; + html += `
ActivityOccurrencesTotal DurationAverage Duration
+`; + } + + html += `
+ +`; + return html; +}; + diff --git a/services/epubService/generators/titlePage.ts b/services/epubService/generators/titlePage.ts new file mode 100644 index 0000000..e3db815 --- /dev/null +++ b/services/epubService/generators/titlePage.ts @@ -0,0 +1,79 @@ +import { NovelConfig, TranslationStats } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; + +/** + * Generates a professional title page using novel metadata + */ +export const generateTitlePage = (novelConfig: NovelConfig, stats: TranslationStats): string => { + let titlePageHtml = `
+`; + + // Main title + titlePageHtml += `

${escapeXml(novelConfig.title)}

+`; + + // Original title (if different) + if (novelConfig.originalTitle && novelConfig.originalTitle !== novelConfig.title) { + titlePageHtml += `
${escapeXml(novelConfig.originalTitle)}
+`; + } + + // Author + titlePageHtml += `
by ${escapeXml(novelConfig.author)}
+`; + + // Metadata section + titlePageHtml += ` +`; // metadata + titlePageHtml += `
+`; // title-page + + return titlePageHtml; +}; diff --git a/services/epubService/generators/toc.ts b/services/epubService/generators/toc.ts new file mode 100644 index 0000000..154081f --- /dev/null +++ b/services/epubService/generators/toc.ts @@ -0,0 +1,56 @@ +import { ChapterForEpub } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; + +/** + * Generates a comprehensive table of contents page with navigation links + */ +export const generateTableOfContents = (chapters: ChapterForEpub[], includeStatsPage: boolean): string => { + let tocHtml = `

Table of Contents

\n\n`; + + tocHtml += `
+`; + tocHtml += `

This translation contains ${chapters.length} chapters

+`; + tocHtml += `
\n\n`; + + tocHtml += `
    +`; + + chapters.forEach((chapter, index) => { + const chapterTitle = chapter.translatedTitle || chapter.title || `Chapter ${index + 1}`; + const chapterHref = `chapter-${String(index + 1).padStart(4, '0')}.xhtml`; + + tocHtml += `
  1. +`; + tocHtml += ` ${escapeXml(chapterTitle)} +`; + tocHtml += `
    +`; + tocHtml += ` Translated with ${escapeXml(chapter.usageMetrics.provider)} ${escapeXml(chapter.usageMetrics.model)} +`; + if (chapter.images && chapter.images.length > 0) { + tocHtml += ` • ${chapter.images.length} illustration${chapter.images.length > 1 ? 's' : ''}`; + } + if (chapter.footnotes && chapter.footnotes.length > 0) { + tocHtml += ` • ${chapter.footnotes.length} footnote${chapter.footnotes.length > 1 ? 's' : ''}`; + } + tocHtml += `
    +`; + tocHtml += `
  2. +`; + }); + + // Optionally include special sections at the end + if (includeStatsPage) { + tocHtml += `
  3. +`; + tocHtml += ` Acknowledgments +`; + tocHtml += `
  4. +`; + } + tocHtml += `
+`; + + return tocHtml; +}; diff --git a/services/epubService/packagers/epubPackager.ts b/services/epubService/packagers/epubPackager.ts new file mode 100644 index 0000000..a855299 --- /dev/null +++ b/services/epubService/packagers/epubPackager.ts @@ -0,0 +1,197 @@ +import JSZip from 'jszip'; +import { EpubMeta, EpubChapter } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; +import { EPUB_STYLESHEET_CSS } from './stylesheet'; + +/** + * Generates EPUB3-compliant ZIP file using JSZip (browser-compatible) + */ +export const generateEpub3WithJSZip = async (meta: EpubMeta, chapters: EpubChapter[]): Promise => { + const lang = meta.language || 'en'; + const bookId = meta.identifier || `urn:uuid:${crypto.randomUUID()}`; + + // EPUB3 directory structure + const oebps = 'OEBPS'; + const textDir = `${oebps}/text`; + const stylesDir = `${oebps}/styles`; + const imagesDir = `${oebps}/images`; + + // Helper to wrap content in XHTML + const xhtmlWrap = (title: string, body: string) => ` + + + + + ${escapeXml(title)} + + + + ${body} + +`; + + // Generate navigation document (EPUB3 requirement) + const navXhtml = ` + + + + + Table of Contents + + + + + +`; + + // Generate manifest items for content.opf + const manifestItems = chapters.map(ch => + `` + ).join('\n '); + + // Generate spine items for content.opf + const spineItems = chapters.map(ch => + `` + ).join('\n '); + + // Content.opf (package document) + const contentOpf = ` + + + ${escapeXml(bookId)} + ${escapeXml(meta.title)} + ${lang} + ${meta.author ? `${escapeXml(meta.author)}` : ''} + ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} + ${meta.description ? `${escapeXml(meta.description)}` : ''} + ${new Date().toISOString()} + + + + + ${manifestItems} + + + ${spineItems} + +`; + + // Container.xml (required EPUB metadata) + const containerXml = ` + + + + +`; + + // Professional CSS styling (preserved from original) + const stylesheet = EPUB_STYLESHEET_CSS; + + // Extract data:image payloads from chapter XHTML and rewrite to packaged image files + type ImgEntry = { href: string; mediaType: string; base64: string; id: string }; + const processedChapters: { ch: EpubChapter; xhtml: string }[] = []; + const imageEntries: ImgEntry[] = []; + let imgIndex = 1; + const dataImgRegex = /(]*?src=")(data:(image\/[A-Za-z0-9.+-]+);base64,([A-Za-z0-9+/=]+))("[^>]*>)/g; + + for (const ch of chapters) { + let xhtml = ch.xhtml; + xhtml = xhtml.replace(dataImgRegex, (_m, p1, _src, mime, b64, p5) => { + const ext = mime.endsWith('jpeg') ? 'jpg' : (mime.split('/')[1] || 'png'); + const filename = `img-${String(imgIndex).padStart(4, '0')}.${ext}`; + const href = `images/${filename}`; + const id = `img${imgIndex}`; + imageEntries.push({ href, mediaType: mime, base64: b64, id }); + imgIndex++; + return `${p1}../${href}${p5}`; + }); + processedChapters.push({ ch, xhtml }); + } + + // Build manifest and spine including images + const manifestItemsText = processedChapters.map(({ ch }) => + `` + ).join('\n '); + const manifestItemsImages = imageEntries.map(img => + `` + ).join('\n '); + const spineItems2 = processedChapters.map(({ ch }) => ``).join('\n '); + + const contentOpf2 = ` + + + ${escapeXml(bookId)} + ${escapeXml(meta.title)} + ${lang} + ${meta.author ? `${escapeXml(meta.author)}` : ''} + ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} + ${meta.description ? `${escapeXml(meta.description)}` : ''} + ${new Date().toISOString()} + + + + + ${manifestItemsText} + ${manifestItemsImages ? `\n ${manifestItemsImages}` : ''} + + + ${spineItems2} + +`; + + // Create ZIP with JSZip + const zip = new JSZip(); + + // Add mimetype (must be first and uncompressed) + zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' }); + + // Add META-INF + zip.file('META-INF/container.xml', containerXml); + + // Add OEBPS content + zip.file(`${oebps}/content.opf`, contentOpf2); + zip.file(`${textDir}/nav.xhtml`, navXhtml); + zip.file(`${stylesDir}/stylesheet.css`, stylesheet); + + // Add processed chapter files and extracted images (with optional strict XML parse diagnostics) + const parseErrors: string[] = []; + for (const { ch, xhtml } of processedChapters) { + const wrapped = xhtmlWrap(ch.title, xhtml); + try { + const parser = new DOMParser(); + const doc = parser.parseFromString(wrapped, 'application/xhtml+xml'); + const hasError = + doc.getElementsByTagName('parsererror').length > 0 || + doc.getElementsByTagNameNS('*', 'parsererror').length > 0; + if (hasError) { + const txt = doc.documentElement.textContent || ''; + const msg = `[ParseError] ${ch.href}: ${txt.slice(0, 300)}`; + console.warn(msg); + parseErrors.push(msg); + } + } catch {} + zip.file(`${textDir}/${ch.href}`, wrapped); + } + for (const img of imageEntries) { + zip.file(`${oebps}/${img.href}`, img.base64, { base64: true }); + } + + // Attach diagnostics when parse errors are detected + if (parseErrors.length > 0) { + zip.file(`${oebps}/debug/parse-errors.txt`, parseErrors.join('\n')); + processedChapters.forEach(({ ch, xhtml }) => { + zip.file(`${oebps}/debug/text/${ch.href}.raw.xhtml`, xhtml); + }); + } + + // Generate and return ArrayBuffer + return await zip.generateAsync({ + type: 'arraybuffer', + mimeType: 'application/epub+zip' + }); +}; diff --git a/services/epubService/packagers/stylesheet.ts b/services/epubService/packagers/stylesheet.ts new file mode 100644 index 0000000..217024b --- /dev/null +++ b/services/epubService/packagers/stylesheet.ts @@ -0,0 +1,160 @@ +export const EPUB_STYLESHEET_CSS = ` +body { + font-family: Georgia, serif; + line-height: 1.6; + max-width: 42em; + margin: 0 auto; + padding: 1.5em; + color: #333; +} +h1 { + color: #2c3e50; + border-bottom: 2px solid #3498db; + padding-bottom: 0.5em; + margin-bottom: 1em; + font-weight: bold; +} +h2 { + color: #27ae60; + border-bottom: 1px solid #27ae60; + padding-bottom: 0.3em; + margin-top: 2em; + margin-bottom: 1em; +} +h3 { + color: #8e44ad; + margin-top: 1.5em; + margin-bottom: 0.75em; +} +p { + margin: 1em 0; + text-align: justify; + text-indent: 1.5em; +} +.illustration { + page-break-inside: avoid; + margin: 2em 0; + text-align: center; +} +.illustration img { + max-width: 100%; + height: auto; + border: 1px solid #ddd; + border-radius: 4px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); +} +.illustration-caption { + font-style: italic; + color: #666; + text-align: center; + font-size: 0.9em; + margin-top: 0.5em; + text-indent: 0; +} +table { + width: 100%; + border-collapse: collapse; + margin: 1em 0; + font-size: 0.9em; +} +th, td { + border: 1px solid #ddd; + padding: 0.75em; + text-align: left; +} +th { + background-color: #f8f9fa; + font-weight: bold; +} +ol, ul { + margin: 1em 0; + padding-left: 2em; +} +li { + margin-bottom: 0.5em; + line-height: 1.5; +} +.gratitude-section { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + padding: 2em; + border-radius: 12px; + margin: 3em 0; +} +.gratitude-section h2 { + color: white; + border-bottom: 1px solid rgba(255,255,255,0.3); + text-align: center; +} +.gratitude-section p { + text-indent: 0; +} +/* Footnotes styling */ +.footnotes { + margin-top: 3em; + padding-top: 2em; + border-top: 1px solid #ddd; +} +.footnotes h3 { + color: #666; + font-size: 1.1em; + margin-bottom: 1em; +} +.footnotes ol { + font-size: 0.9em; + line-height: 1.4; +} +.footnotes li { + margin-bottom: 0.75em; +} +.footnote-ref { + font-size: 0.8em; + vertical-align: super; + text-decoration: none; + color: #007bff; + font-weight: bold; +} +.footnote-backref { + margin-left: 0.5em; + font-size: 0.8em; + text-decoration: none; + color: #007bff; +} +.footnote-ref:hover, .footnote-backref:hover { + text-decoration: underline; +} +/* Title page specific styling */ +.title-page { + text-align: center; + padding: 4em 2em; + page-break-after: always; +} +.title-page h1 { + font-size: 3em; + margin-bottom: 0.5em; + color: #2c3e50; + border: none; + padding: 0; +} +.title-page .subtitle { + font-size: 1.5em; + color: #7f8c8d; + font-style: italic; + margin-bottom: 2em; +} +.title-page .author { + font-size: 1.25em; + color: #34495e; + margin-bottom: 1em; +} +.title-page .metadata { + margin-top: 3em; + font-size: 0.9em; + color: #666; + line-height: 1.6; +} +.title-page .metadata p { + text-indent: 0; + margin: 0.5em 0; +}`; + diff --git a/services/epubService/sanitizers/xhtmlSanitizer.ts b/services/epubService/sanitizers/xhtmlSanitizer.ts new file mode 100644 index 0000000..68923c3 Binary files /dev/null and b/services/epubService/sanitizers/xhtmlSanitizer.ts differ diff --git a/services/epubService/templates/defaults.ts b/services/epubService/templates/defaults.ts new file mode 100644 index 0000000..bc00acc --- /dev/null +++ b/services/epubService/templates/defaults.ts @@ -0,0 +1,30 @@ +import { EpubTemplate } from '../types'; + +/** + * Default template for EPUB metadata + * This template can be customized by users to personalize their EPUB exports + */ +export const getDefaultTemplate = (): EpubTemplate => ({ + gratitudeMessage: `This translation was made possible through the remarkable capabilities of modern AI language models. We express our deep gratitude to the teams behind these technologies who have made creative translation accessible to everyone.`, + + projectDescription: `This e-book was generated using LexiconForge, an open-source AI translation platform that enables high-quality, creative translations of literature. The platform supports multiple AI providers and allows for collaborative refinement of translations.`, + + githubUrl: 'https://github.com/anantham/LexiconForge', + + additionalAcknowledgments: `Special thanks to the original authors whose creative works inspire these translations, and to the open-source community that makes tools like this possible. Translation is an art that bridges cultures and languages, bringing stories to new audiences worldwide.`, + + customFooter: '' +}); + +/** + * Creates a customizable template - users can override any field + */ +export const createCustomTemplate = (overrides: Partial): EpubTemplate => { + const def = getDefaultTemplate(); + const merge = (a: any, b: any): any => + Object.fromEntries(Object.keys({ ...a, ...b }).map(k => { + const av = (a as any)[k], bv = (b as any)[k]; + return [k, (av && typeof av === 'object' && bv && typeof bv === 'object') ? merge(av, bv) : (bv ?? av)]; + })); + return merge(def, overrides ?? {}); +}; diff --git a/services/epubService/templates/novelConfig.ts b/services/epubService/templates/novelConfig.ts new file mode 100644 index 0000000..4059236 --- /dev/null +++ b/services/epubService/templates/novelConfig.ts @@ -0,0 +1,81 @@ +import { NovelConfig } from '../types'; + +/** + * Gets novel configuration based on URL or manual configuration + * This allows for novel-specific metadata like title, author, etc. + */ +export const getNovelConfig = (firstChapterUrl?: string, manualConfig?: Partial): NovelConfig => { + // Default configuration + const defaultConfig: NovelConfig = { + title: 'Translated Novel', + author: 'Unknown Author', + language: 'en', + originalLanguage: 'ja', + publisher: 'LexiconForge Community' + }; + + // Novel-specific configurations based on URL patterns + let novelSpecificConfig: Partial = {}; + + if (firstChapterUrl) { + if (firstChapterUrl.includes('kakuyomu.jp')) { + // Enhanced configuration based on Novel Updates data + novelSpecificConfig = { + title: 'The Reincarnation of the Strongest Exorcist in Another World', + author: 'Kosuzu Kiichi', + originalTitle: '最強陰陽師の異世界転生記 〜下僕の妖怪どもに比べてモンスターが弱すぎるんだが〜', + description: 'Haruyoshi, the strongest exorcist was on the verge of death after the betrayal of his companions. Hoping to be happy in the next life, he tried the secret technique of reincarnation and was sent to a different world! Born into a family of magicians, the magic he failed to inherit was nothing compared to his previous skills as an exorcist. "Who needs magic? I\'ll survive in this world with my old techniques!"', + genre: 'Action, Adventure, Fantasy, Harem, Romance', + originalLanguage: 'ja', + seriesName: 'The Reincarnation of the Strongest Exorcist', + volumeNumber: 1, + isbn: 'urn:uuid:strongest-exorcist-v1', + publisher: 'Futabasha (Original) / J-Novel Club (English)', + translationNotes: 'Translated from Japanese web novel published on Kakuyomu and Syosetu. Originally published in 2018 by Kosuzu Kiichi. Licensed by J-Novel Club for English publication. This is an AI-powered fan translation for educational and entertainment purposes.' + }; + } else if (firstChapterUrl.includes('booktoki468.com')) { + novelSpecificConfig = { + title: 'Dungeon Defense', + author: 'Yoo Heonhwa', + originalTitle: '던전 디펜스', + description: 'A dark fantasy novel about survival and strategy in a dungeon world where the protagonist must use cunning and manipulation to survive against overwhelming odds.', + genre: 'Dark Fantasy, Strategy, Psychological', + originalLanguage: 'ko', + seriesName: 'Dungeon Defense', + volumeNumber: 1, + isbn: 'urn:uuid:dungeon-defense-v1', + publisher: 'BookToki (Original)', + translationNotes: 'Translated from Korean web novel published on BookToki. Known for its complex psychological elements and strategic gameplay mechanics.' + }; + } else if (firstChapterUrl.includes('syosetu.com') || firstChapterUrl.includes('ncode.syosetu.com')) { + // Syosetu - Japanese web novel platform + novelSpecificConfig = { + title: 'Web Novel from Syosetu', + author: 'Unknown Syosetu Author', + originalTitle: '小説家になろう作品', + description: 'Japanese web novel from the popular Syosetu platform.', + genre: 'Web Novel, Japanese Literature', + originalLanguage: 'ja', + publisher: 'Syosetu (Original)', + translationNotes: 'Translated from Japanese web novel published on Syosetu (Shōsetsuka ni Narō).' + }; + } else if (firstChapterUrl.includes('novelupdates.com')) { + // Novel Updates - aggregator site + novelSpecificConfig = { + title: 'Novel from Novel Updates', + author: 'Unknown Author', + description: 'Novel sourced from Novel Updates database.', + genre: 'Various', + publisher: 'Novel Updates Community', + translationNotes: 'Novel information sourced from Novel Updates community database.' + }; + } + // Add more novel configurations as needed + } + + return { + ...defaultConfig, + ...novelSpecificConfig, + ...manualConfig + }; +}; diff --git a/services/epubService/types.ts b/services/epubService/types.ts new file mode 100644 index 0000000..6553092 --- /dev/null +++ b/services/epubService/types.ts @@ -0,0 +1,116 @@ +import { AppSettings } from '../../types'; + +export interface ChapterForEpub { + title: string; + originalTitle?: string; + content: string; + originalUrl: string; + url?: string; + translatedTitle: string; + translatedContent?: string; + prevUrl?: string | null; + nextUrl?: string | null; + usageMetrics: { + totalTokens: number; + promptTokens: number; + completionTokens: number; + estimatedCost: number; + requestTime: number; + provider: string; + model: string; + }; + images: Array<{ + marker: string; + imageData: string; // base64 data URL + prompt: string; + }>; + footnotes?: Array<{ + marker: string; + text: string; + }>; +} + +export interface TranslationStats { + totalCost: number; + totalTime: number; + totalTokens: number; + chapterCount: number; + imageCount: number; + providerBreakdown: Record; + modelBreakdown: Record; +} + +export interface TelemetryInsights { + totalEvents: number; + sessionDurationMs: number; + navigation: { count: number; totalMs: number; averageMs: number }; + hydration: { count: number; totalMs: number; averageMs: number }; + chapterReady: { count: number; totalMs: number; averageMs: number }; + exports?: { json?: { count: number; totalMs: number; averageMs: number }; epub?: { count: number; totalMs: number; averageMs: number } }; +} + +export interface NovelConfig { + title: string; + author: string; + originalTitle?: string; + description?: string; + genre?: string; + language: string; + originalLanguage?: string; + coverImage?: string; // base64 or URL + seriesName?: string; + volumeNumber?: number; + isbn?: string; + publisher?: string; + translationNotes?: string; +} + +export interface EpubTemplate { + gratitudeMessage?: string; + projectDescription?: string; + githubUrl?: string; + additionalAcknowledgments?: string; + customFooter?: string; +} + +export interface EpubExportOptions { + title?: string; + author?: string; + description?: string; + chapters: ChapterForEpub[]; + settings: AppSettings; + template?: EpubTemplate; + novelConfig?: NovelConfig; + telemetryInsights?: TelemetryInsights; + includeTitlePage?: boolean; + includeStatsPage?: boolean; + customTemplate?: any; + manualConfig?: any; + chapterUrls?: string[]; +} + +export interface EpubChapter { + id: string; + title: string; + xhtml: string; + href: string; +} + +export interface EpubMeta { + title: string; + author: string; + description?: string; + language?: string; + identifier?: string; + publisher?: string; +} \ No newline at end of file