From d96a6312adecb5cd5a344bdebe424ef524e7d5dc Mon Sep 17 00:00:00 2001 From: Aditya A P Date: Mon, 22 Dec 2025 09:05:11 +0530 Subject: [PATCH 1/3] refactor(epub): decompose epubService into modular components - **Sanitizers**: Extracted XML/XHTML logic to `services/epub/sanitizers/` - **Generators**: Moved HTML generation to `services/epub/generators/` - **Data**: Moved collection and stats logic to `services/epub/data/` - **Packaging**: Moved JSZip logic to `services/epub/packagers/` - **Orchestrator**: `epubService.ts` is now a thin facade (~120 LOC) - **Tests**: Verified with existing suite (16 tests passed) --- services/epub/data/collector.ts | 128 ++ services/epub/data/stats.ts | 58 + services/epub/generators/chapter.ts | 212 +++ services/epub/generators/statsPage.ts | 304 ++++ services/epub/generators/titlePage.ts | 79 + services/epub/generators/toc.ts | 56 + services/epub/packagers/epubPackager.ts | 354 ++++ services/epub/sanitizers/xhtmlSanitizer.ts | Bin 0 -> 9257 bytes services/epub/templates/defaults.ts | 30 + services/epub/templates/novelConfig.ts | 81 + services/epub/types.ts | 334 +--- services/epubService.ts | 1718 +------------------- 12 files changed, 1447 insertions(+), 1907 deletions(-) create mode 100644 services/epub/data/collector.ts create mode 100644 services/epub/data/stats.ts create mode 100644 services/epub/generators/chapter.ts create mode 100644 services/epub/generators/statsPage.ts create mode 100644 services/epub/generators/titlePage.ts create mode 100644 services/epub/generators/toc.ts create mode 100644 services/epub/packagers/epubPackager.ts create mode 100644 services/epub/sanitizers/xhtmlSanitizer.ts create mode 100644 services/epub/templates/defaults.ts create mode 100644 services/epub/templates/novelConfig.ts diff --git a/services/epub/data/collector.ts b/services/epub/data/collector.ts new file mode 100644 index 0000000..7a27e09 --- /dev/null +++ b/services/epub/data/collector.ts @@ -0,0 +1,128 @@ +import { SessionChapterData } from '../../../types'; +import { ChapterForEpub } from '../types'; + +/** + * Creates a ChapterForEpub object from session data + */ +export const createChapterForEpub = (data: any, url: string): ChapterForEpub => { + // Create default metrics for chapters missing usage data + let metrics = data.translationResult.usageMetrics; + + if (!metrics) { + console.warn(`[EPUBService] Chapter ${url} missing usageMetrics - using defaults for statistics`); + metrics = { + totalTokens: 0, + promptTokens: 0, + completionTokens: 0, + estimatedCost: 0, + requestTime: 0, + provider: 'Unknown', + model: 'Unknown' + }; + } else { + // Validate and fix invalid metrics values + const fixedMetrics = { + totalTokens: typeof metrics.totalTokens === 'number' && isFinite(metrics.totalTokens) ? metrics.totalTokens : 0, + promptTokens: typeof metrics.promptTokens === 'number' && isFinite(metrics.promptTokens) ? metrics.promptTokens : 0, + completionTokens: typeof metrics.completionTokens === 'number' && isFinite(metrics.completionTokens) ? metrics.completionTokens : 0, + estimatedCost: typeof metrics.estimatedCost === 'number' && isFinite(metrics.estimatedCost) ? metrics.estimatedCost : 0, + requestTime: typeof metrics.requestTime === 'number' && isFinite(metrics.requestTime) ? metrics.requestTime : 0, + provider: typeof metrics.provider === 'string' ? metrics.provider : 'Unknown', + model: typeof metrics.model === 'string' ? metrics.model : 'Unknown' + }; + + // Check if we had to fix any values + const hadInvalidData = Object.keys(metrics).some(key => + metrics[key] !== fixedMetrics[key] + ); + + if (hadInvalidData) { + console.warn(`[EPUBService] Chapter ${url} had invalid usageMetrics - fixed for statistics:`, { + original: metrics, + fixed: fixedMetrics + }); + } + + metrics = fixedMetrics; + } + + // Get images from translation result + const images = data.translationResult.suggestedIllustrations?.map((illust: any) => ({ + marker: illust.placementMarker, + imageData: illust.url || '', // This should be base64 data from generation + prompt: illust.imagePrompt + })) || []; + + // Get footnotes from translation result + const footnotes = data.translationResult.footnotes?.map((footnote: any) => ({ + marker: footnote.marker, + text: footnote.text + })) || []; + + const translatedContent = data.translationResult.translation || ''; + + return { + title: data.chapter.title, + originalTitle: data.chapter.originalTitle || data.chapter.title, + content: data.chapter.content, + originalUrl: url, + url, + translatedTitle: data.translationResult.translatedTitle, + translatedContent, + prevUrl: data.chapter.prevUrl ?? null, + nextUrl: data.chapter.nextUrl ?? null, + usageMetrics: { + totalTokens: metrics.totalTokens, + promptTokens: metrics.promptTokens, + completionTokens: metrics.completionTokens, + estimatedCost: metrics.estimatedCost, + requestTime: metrics.requestTime, + provider: metrics.provider, + model: metrics.model, + }, + images: images.filter((img: any) => img.imageData), // Only include images with data + footnotes: footnotes + }; +}; + +/** + * Collects active version chapters from session data for EPUB export + * Uses activeVersion tracking to determine which translation to include + */ +export const collectActiveVersions = ( + sessionData: Record, + urlHistory: string[] +): ChapterForEpub[] => { + const chapters: ChapterForEpub[] = []; + + // Use urlHistory for ordering, but also include any chapters not in history + // First, process chapters in urlHistory order to maintain chronological sequence + const processedUrls = new Set(); + + // Add chapters from urlHistory first (in order) + for (const url of urlHistory) { + if (sessionData[url]?.chapter && sessionData[url]?.translationResult) { + processedUrls.add(url); + const data = sessionData[url]; + chapters.push(createChapterForEpub(data, url)); + } + } + + // Then add any remaining chapters not in urlHistory (sorted by URL for consistency) + const remainingUrls = Object.keys(sessionData) + .filter(url => !processedUrls.has(url)) + .sort(); + + for (const url of remainingUrls) { + const data = sessionData[url]; + if (!data?.chapter || !data?.translationResult) { + console.log(`[EPUBService] Skipping ${url} - missing chapter or translation result`); + continue; + } + + chapters.push(createChapterForEpub(data, url)); + } + + console.log(`[EPUBService] Prepared ${chapters.length} chapters for EPUB in chronological order`); + return chapters; +}; diff --git a/services/epub/data/stats.ts b/services/epub/data/stats.ts new file mode 100644 index 0000000..b24b5c5 --- /dev/null +++ b/services/epub/data/stats.ts @@ -0,0 +1,58 @@ +import { ChapterForEpub, TranslationStats } from '../types'; + +/** + * Calculates comprehensive statistics from collected chapters + */ +export const calculateTranslationStats = (chapters: ChapterForEpub[]): TranslationStats => { + const stats: TranslationStats = { + totalCost: 0, + totalTime: 0, + totalTokens: 0, + chapterCount: chapters.length, + imageCount: 0, + providerBreakdown: {}, + modelBreakdown: {} + }; + + chapters.forEach(chapter => { + const metrics = chapter.usageMetrics; + + // Aggregate totals + stats.totalCost += metrics.estimatedCost; + stats.totalTime += metrics.requestTime; + stats.totalTokens += metrics.totalTokens; + stats.imageCount += chapter.images.length; + + // Provider breakdown + if (!stats.providerBreakdown[metrics.provider]) { + stats.providerBreakdown[metrics.provider] = { + chapters: 0, + cost: 0, + time: 0, + tokens: 0 + }; + } + const providerStats = stats.providerBreakdown[metrics.provider]; + providerStats.chapters += 1; + providerStats.cost += metrics.estimatedCost; + providerStats.time += metrics.requestTime; + providerStats.tokens += metrics.totalTokens; + + // Model breakdown + if (!stats.modelBreakdown[metrics.model]) { + stats.modelBreakdown[metrics.model] = { + chapters: 0, + cost: 0, + time: 0, + tokens: 0 + }; + } + const modelStats = stats.modelBreakdown[metrics.model]; + modelStats.chapters += 1; + modelStats.cost += metrics.estimatedCost; + modelStats.time += metrics.requestTime; + modelStats.tokens += metrics.totalTokens; + }); + + return stats; +}; diff --git a/services/epub/generators/chapter.ts b/services/epub/generators/chapter.ts new file mode 100644 index 0000000..13abc4d --- /dev/null +++ b/services/epub/generators/chapter.ts @@ -0,0 +1,212 @@ +import { ChapterForEpub } from '../types'; +import { + sanitizeHtmlAllowlist, + toStrictXhtml, + convertNewlinesToBrInElement, + htmlFragmentToXhtml, + escapeXml +} from '../sanitizers/xhtmlSanitizer'; + +/** + * Converts chapter content with illustrations and footnotes to XHTML suitable for EPUB + */ +export const convertChapterToHtml = (chapter: ChapterForEpub): string => { + let htmlContent = chapter.translatedTitle ? + `

${escapeXml(chapter.translatedTitle)}

\n\n` : + `

${escapeXml(chapter.title)}

\n\n`; + + // Get the translated content, fallback to original if needed + let content = chapter.content; + + // Process content and embed images + if (chapter.images.length > 0) { + // Replace illustration markers with actual images + for (const image of chapter.images) { + const imgHtml = `
+ ${escapeXml(image.prompt)} +

${escapeXml(image.prompt)}

+
`; + + content = content.replace(image.marker, imgHtml); + } + } + + // Process and embed footnotes + if (chapter.footnotes && chapter.footnotes.length > 0) { + // Replace footnote markers with links + for (const footnote of chapter.footnotes) { + const footnoteLink = `[${footnote.marker}]`; + content = content.replace(`[${footnote.marker}]`, footnoteLink); + } + + // Add footnotes section at the end + let footnotesHtml = `
+

Footnotes

+
    +`; + for (const footnote of chapter.footnotes) { + footnotesHtml += `
  1. +`; + footnotesHtml += ` ${escapeXml(footnote.text)} +`; + footnotesHtml += ` +`; + footnotesHtml += `
  2. \n`; + } + footnotesHtml += `
+
+`; + content += '\n' + footnotesHtml; + } + + // Convert content to proper XHTML paragraphs + content = convertToXhtmlParagraphs(content); + + htmlContent += content; + + return htmlContent; +}; + +/** + * Converts text content to proper XHTML paragraphs without invalid nesting + */ +export const convertToXhtmlParagraphs = (content: string): string => { + // First, escape any remaining unescaped XML entities + content = content.replace(/&(?!(amp|lt|gt|quot|apos);)/g, '&'); + + // Split content by double newlines to create paragraphs + const paragraphs = content.split(/\n\s*\n/); + + let xhtmlContent = ''; + + for (let para of paragraphs) { + para = para.trim(); + if (!para) continue; + + // Check if this paragraph already contains block-level HTML elements + const hasBlockElements = /<(div|p|h[1-6]|ul|ol|li|blockquote|pre|hr|table|form|fieldset|address|center)[^>]*>/i.test(para); + + if (hasBlockElements) { + // Already has block elements, just add it as-is but fix line breaks + para = para.replace(/\n/g, ' '); // Convert single line breaks to spaces within block elements + xhtmlContent += para + '\n\n'; + } else { + // Regular text paragraph - wrap in

and convert line breaks to
+ para = para.replace(/\n/g, '
'); // Use self-closing br tags for XHTML + xhtmlContent += `

${para}

\n\n`; + } + } + + return xhtmlContent.trim(); +}; + +/** + * Build chapter XHTML using DOM nodes (footnotes visible inline and at end) + */ +export const buildChapterXhtml = (chapter: ChapterForEpub): string => { + const root = document.createElement('div'); + // Title + const h1 = document.createElement('h1'); + h1.textContent = chapter.translatedTitle || chapter.title; + root.appendChild(h1); + + // 1) Inject placeholders for markers + const withIllu = chapter.content.replace(/\b(ILLUSTRATION-\d+[A-Za-z]*)\b/g, (_m, marker) => { + return ``; + }); + const withPlaceholders = withIllu.replace(/\((\d+)\)/g, (_m, n) => ``); + + // 2) Sanitize with tight allowlist to preserve inline tags safely + const sanitized = sanitizeHtmlAllowlist(withPlaceholders); + + // 3) Materialize into a working container and normalize newlines to
+ const container = document.createElement('div'); + container.innerHTML = sanitized; + convertNewlinesToBrInElement(container); + + // 4) Replace placeholders with generated illustration blocks and footnote refs + const imagesByMarker = new Map( + chapter.images.map(i => [i.marker, i]) + ); + for (const span of Array.from(container.querySelectorAll('span[data-illu]'))) { + const marker = (span as HTMLElement).getAttribute('data-illu') || ''; + const img = imagesByMarker.get(`[${marker}]`) || imagesByMarker.get(marker); + if (img) { + const wrap = document.createElement('div'); + wrap.setAttribute('class', 'illustration'); + const im = document.createElement('img'); + im.setAttribute('src', img.imageData); + im.setAttribute('alt', img.prompt); + im.setAttribute('style', 'max-width: 100%; height: auto; display: block; margin: 1em auto;'); + const cap = document.createElement('p'); + cap.setAttribute('class', 'illustration-caption'); + cap.setAttribute('style', 'text-align: center; font-style: italic; color: #666; font-size: 0.9em; margin-top: 0.5em;'); + cap.textContent = img.prompt; + wrap.appendChild(im); + wrap.appendChild(cap); + span.replaceWith(wrap); + } else { + // If missing, remove placeholder + span.remove(); + } + } + for (const span of Array.from(container.querySelectorAll('span[data-fn]'))) { + const num = (span as HTMLElement).getAttribute('data-fn') || ''; + const sup = document.createElement('sup'); + const a = document.createElement('a'); + a.setAttribute('href', `#fn${num}`); + a.setAttribute('class', 'footnote-ref'); + a.setAttribute('id', `fnref${num}`); + a.setAttribute('epub:type', 'noteref'); + a.textContent = `[${num}]`; + sup.appendChild(a); + span.replaceWith(sup); + } + + // 5) Append sanitized content under title + while (container.firstChild) root.appendChild(container.firstChild); + + // 6) Footnotes section at end + if (chapter.footnotes && chapter.footnotes.length > 0) { + const div = document.createElement('div'); + div.setAttribute('class', 'footnotes'); + const h3 = document.createElement('h3'); + h3.textContent = 'Footnotes'; + const ol = document.createElement('ol'); + div.appendChild(h3); + div.appendChild(ol); + for (const fn of chapter.footnotes) { + const num = String(fn.marker).replace(/^\ \[|\ \]$/g, ''); + const li = document.createElement('li'); + li.setAttribute('id', `fn${num}`); + li.setAttribute('epub:type', 'footnote'); + + // Allow limited inline HTML inside footnotes (e.g., , ,
) + try { + const safeHtml = sanitizeHtmlAllowlist(fn.text || ''); + if (safeHtml) { + const temp = document.createElement('div'); + temp.innerHTML = safeHtml; + while (temp.firstChild) li.appendChild(temp.firstChild); + li.appendChild(document.createTextNode(' ')); + } else { + li.appendChild(document.createTextNode((fn.text || '') + ' ')); + } + } catch { + li.appendChild(document.createTextNode((fn.text || '') + ' ')); + } + + const back = document.createElement('a'); + back.setAttribute('href', `#fnref${num}`); + back.setAttribute('class', 'footnote-backref'); + back.setAttribute('epub:type', 'backlink'); + back.textContent = '↩'; + li.appendChild(back); + ol.appendChild(li); + } + root.appendChild(div); + } + + // 7) XHTML serialization + return htmlFragmentToXhtml(toStrictXhtml(root.innerHTML)); +}; diff --git a/services/epub/generators/statsPage.ts b/services/epub/generators/statsPage.ts new file mode 100644 index 0000000..548cefd --- /dev/null +++ b/services/epub/generators/statsPage.ts @@ -0,0 +1,304 @@ +import { TranslationStats, EpubTemplate, TelemetryInsights } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; + +/** + * Generates a detailed statistics and acknowledgments page + */ +export const renderTelemetryInsights = (telemetry?: TelemetryInsights): string => { + if (!telemetry) return ''; + + const formatMs = (ms: number): string => { + if (!Number.isFinite(ms)) return '—'; + if (ms < 1000) return `${ms.toFixed(0)} ms`; + const seconds = ms / 1000; + if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)} s`; + const minutes = seconds / 60; + if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)} min`; + const hours = minutes / 60; + return `${hours.toFixed(2)} h`; + }; + + const renderRow = (label: string, data?: { count: number; totalMs: number; averageMs: number }) => { + if (!data || data.count === 0) return ''; + return ` + + ${label} + ${data.count} + ${formatMs(data.totalMs)} + ${formatMs(data.averageMs)} + `; + }; + + const rows = [ + renderRow('Navigation requests', telemetry.navigation), + renderRow('IndexedDB hydration', telemetry.hydration), + renderRow('Chapter ready-to-read', telemetry.chapterReady), + renderRow('JSON exports', telemetry.exports?.json), + renderRow('EPUB exports', telemetry.exports?.epub) + ].filter(Boolean).join(''); + + let html = `
+`; + html += `

Session Insights

+`; + html += `

Recorded via LexiconForge telemetry during preparation of this EPUB.

+`; + html += `
+`; + html += `
+`; + html += `
${telemetry.totalEvents.toLocaleString()}
+`; + html += `
Telemetry Events
+`; + html += `
+`; + html += `
+`; + html += `
${formatMs(telemetry.sessionDurationMs)}
+`; + html += `
Session Duration
+`; + html += `
+`; + html += `
+`; + + if (rows) { + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += rows; + html += ` +`; + html += `
ActivityOccurrencesTotal DurationAverage Duration
+`; + } + + html += `
+ +`; + return html; +}; + +export const generateStatsAndAcknowledgments = (stats: TranslationStats, template: EpubTemplate, telemetry?: TelemetryInsights): string => { + let html = `

Acknowledgments

\n\n`; + + // Project description + html += `
+`; + html += `

About This Translation

+`; + html += `

${escapeXml(template.projectDescription || '')}

+`; + if (template.githubUrl) { + html += `

Source Code: ${escapeXml(template.githubUrl)}

+`; + } + html += `
\n\n`; + + // Translation statistics + html += `
+`; + html += `

Translation Statistics

+`; + + html += `
+`; + html += `
+`; + html += `
${stats.chapterCount}
+`; + html += `
Chapters
+`; + html += `
+`; + html += `
+`; + html += `
$${stats.totalCost.toFixed(4)}
+`; + html += `
Total Cost
+`; + html += `
+`; + html += `
+`; + html += `
${Math.round(stats.totalTime)}s
+`; + html += `
Total Time
+`; + html += `
+`; + html += `
+`; + html += `
${stats.totalTokens.toLocaleString()}
+`; + html += `
Total Tokens
+`; + html += `
+`; + if (stats.imageCount > 0) { + html += `
+`; + html += `
${stats.imageCount}
+`; + html += `
Images Generated
+`; + html += `
+`; + } + html += `
+`; + html += `
\n\n`; + + html += renderTelemetryInsights(telemetry); + + // Provider breakdown + const providers = Object.keys(stats.providerBreakdown); + if (providers.length > 0) { + html += `
+`; + html += `

Translation Providers Used

+`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + + providers.forEach(provider => { + const providerStats = stats.providerBreakdown[provider]; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + }); + + html += ` +`; + html += `
ProviderChaptersCostTime
${escapeXml(provider)}${providerStats.chapters}$${providerStats.cost.toFixed(4)}${Math.round(providerStats.time)}s
+`; + html += `
\n\n`; + } + + // Model breakdown (top 10 most used) + const models = Object.entries(stats.modelBreakdown) + .sort(([,a], [,b]) => b.chapters - a.chapters) + .slice(0, 10); + + if (models.length > 0) { + html += `
+`; + html += `

AI Models Used

+`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + + models.forEach(([model, modelStats]) => { + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + }); + + html += ` +`; + html += `
ModelChaptersTokens
${escapeXml(model)}${modelStats.chapters}${modelStats.tokens.toLocaleString()}
+`; + html += `
\n\n`; + } + + // Gratitude message + html += `
+`; + html += `

Acknowledgments

+`; + html += `

${escapeXml(template.gratitudeMessage || '')}

+`; + if (template.additionalAcknowledgments) { + html += `

${escapeXml(template.additionalAcknowledgments)}

+`; + } + html += `
\n\n`; + + // Footer + if (template.customFooter) { + html += `
+`; + html += `${escapeXml(template.customFooter)} +`; + html += `
+`; + } + + html += `
+`; + html += `

Translation completed on ${new Date().toLocaleDateString()}

+`; + html += `
+`; + + return html; +}; diff --git a/services/epub/generators/titlePage.ts b/services/epub/generators/titlePage.ts new file mode 100644 index 0000000..e3db815 --- /dev/null +++ b/services/epub/generators/titlePage.ts @@ -0,0 +1,79 @@ +import { NovelConfig, TranslationStats } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; + +/** + * Generates a professional title page using novel metadata + */ +export const generateTitlePage = (novelConfig: NovelConfig, stats: TranslationStats): string => { + let titlePageHtml = `
+`; + + // Main title + titlePageHtml += `

${escapeXml(novelConfig.title)}

+`; + + // Original title (if different) + if (novelConfig.originalTitle && novelConfig.originalTitle !== novelConfig.title) { + titlePageHtml += `
${escapeXml(novelConfig.originalTitle)}
+`; + } + + // Author + titlePageHtml += `
by ${escapeXml(novelConfig.author)}
+`; + + // Metadata section + titlePageHtml += ` +`; // metadata + titlePageHtml += `
+`; // title-page + + return titlePageHtml; +}; diff --git a/services/epub/generators/toc.ts b/services/epub/generators/toc.ts new file mode 100644 index 0000000..154081f --- /dev/null +++ b/services/epub/generators/toc.ts @@ -0,0 +1,56 @@ +import { ChapterForEpub } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; + +/** + * Generates a comprehensive table of contents page with navigation links + */ +export const generateTableOfContents = (chapters: ChapterForEpub[], includeStatsPage: boolean): string => { + let tocHtml = `

Table of Contents

\n\n`; + + tocHtml += `
+`; + tocHtml += `

This translation contains ${chapters.length} chapters

+`; + tocHtml += `
\n\n`; + + tocHtml += `
    +`; + + chapters.forEach((chapter, index) => { + const chapterTitle = chapter.translatedTitle || chapter.title || `Chapter ${index + 1}`; + const chapterHref = `chapter-${String(index + 1).padStart(4, '0')}.xhtml`; + + tocHtml += `
  1. +`; + tocHtml += ` ${escapeXml(chapterTitle)} +`; + tocHtml += `
    +`; + tocHtml += ` Translated with ${escapeXml(chapter.usageMetrics.provider)} ${escapeXml(chapter.usageMetrics.model)} +`; + if (chapter.images && chapter.images.length > 0) { + tocHtml += ` • ${chapter.images.length} illustration${chapter.images.length > 1 ? 's' : ''}`; + } + if (chapter.footnotes && chapter.footnotes.length > 0) { + tocHtml += ` • ${chapter.footnotes.length} footnote${chapter.footnotes.length > 1 ? 's' : ''}`; + } + tocHtml += `
    +`; + tocHtml += `
  2. +`; + }); + + // Optionally include special sections at the end + if (includeStatsPage) { + tocHtml += `
  3. +`; + tocHtml += ` Acknowledgments +`; + tocHtml += `
  4. +`; + } + tocHtml += `
+`; + + return tocHtml; +}; diff --git a/services/epub/packagers/epubPackager.ts b/services/epub/packagers/epubPackager.ts new file mode 100644 index 0000000..aafe6ec --- /dev/null +++ b/services/epub/packagers/epubPackager.ts @@ -0,0 +1,354 @@ +import JSZip from 'jszip'; +import { EpubMeta, EpubChapter } from '../types'; +import { escapeXml } from '../sanitizers/xhtmlSanitizer'; + +/** + * Generates EPUB3-compliant ZIP file using JSZip (browser-compatible) + */ +export const generateEpub3WithJSZip = async (meta: EpubMeta, chapters: EpubChapter[]): Promise => { + const lang = meta.language || 'en'; + const bookId = meta.identifier || `urn:uuid:${crypto.randomUUID()}`; + + // EPUB3 directory structure + const oebps = 'OEBPS'; + const textDir = `${oebps}/text`; + const stylesDir = `${oebps}/styles`; + const imagesDir = `${oebps}/images`; + + // Helper to wrap content in XHTML + const xhtmlWrap = (title: string, body: string) => ` + + + + + ${escapeXml(title)} + + + + ${body} + +`; + + // Generate navigation document (EPUB3 requirement) + const navXhtml = ` + + + + + Table of Contents + + + + + +`; + + // Generate manifest items for content.opf + const manifestItems = chapters.map(ch => + `` + ).join('\n '); + + // Generate spine items for content.opf + const spineItems = chapters.map(ch => + `` + ).join('\n '); + + // Content.opf (package document) + const contentOpf = ` + + + ${escapeXml(bookId)} + ${escapeXml(meta.title)} + ${lang} + ${meta.author ? `${escapeXml(meta.author)}` : ''} + ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} + ${meta.description ? `${escapeXml(meta.description)}` : ''} + ${new Date().toISOString()} + + + + + ${manifestItems} + + + ${spineItems} + +`; + + // Container.xml (required EPUB metadata) + const containerXml = ` + + + + +`; + + // Professional CSS styling (preserved from original) + const stylesheet = ` +body { + font-family: Georgia, serif; + line-height: 1.6; + max-width: 42em; + margin: 0 auto; + padding: 1.5em; + color: #333; +} +h1 { + color: #2c3e50; + border-bottom: 2px solid #3498db; + padding-bottom: 0.5em; + margin-bottom: 1em; + font-weight: bold; +} +h2 { + color: #27ae60; + border-bottom: 1px solid #27ae60; + padding-bottom: 0.3em; + margin-top: 2em; + margin-bottom: 1em; +} +h3 { + color: #8e44ad; + margin-top: 1.5em; + margin-bottom: 0.75em; +} +p { + margin: 1em 0; + text-align: justify; + text-indent: 1.5em; +} +.illustration { + page-break-inside: avoid; + margin: 2em 0; + text-align: center; +} +.illustration img { + max-width: 100%; + height: auto; + border: 1px solid #ddd; + border-radius: 4px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); +} +.illustration-caption { + font-style: italic; + color: #666; + text-align: center; + font-size: 0.9em; + margin-top: 0.5em; + text-indent: 0; +} +table { + width: 100%; + border-collapse: collapse; + margin: 1em 0; + font-size: 0.9em; +} +th, td { + border: 1px solid #ddd; + padding: 0.75em; + text-align: left; +} +th { + background-color: #f8f9fa; + font-weight: bold; +} +ol, ul { + margin: 1em 0; + padding-left: 2em; +} +li { + margin-bottom: 0.5em; + line-height: 1.5; +} +.gratitude-section { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + padding: 2em; + border-radius: 12px; + margin: 3em 0; +} +.gratitude-section h2 { + color: white; + border-bottom: 1px solid rgba(255,255,255,0.3); + text-align: center; +} +.gratitude-section p { + text-indent: 0; +} +/* Footnotes styling */ +.footnotes { + margin-top: 3em; + padding-top: 2em; + border-top: 1px solid #ddd; +} +.footnotes h3 { + color: #666; + font-size: 1.1em; + margin-bottom: 1em; +} +.footnotes ol { + font-size: 0.9em; + line-height: 1.4; +} +.footnotes li { + margin-bottom: 0.75em; +} +.footnote-ref { + font-size: 0.8em; + vertical-align: super; + text-decoration: none; + color: #007bff; + font-weight: bold; +} +.footnote-backref { + margin-left: 0.5em; + font-size: 0.8em; + text-decoration: none; + color: #007bff; +} +.footnote-ref:hover, .footnote-backref:hover { + text-decoration: underline; +} +/* Title page specific styling */ +.title-page { + text-align: center; + padding: 4em 2em; + page-break-after: always; +} +.title-page h1 { + font-size: 3em; + margin-bottom: 0.5em; + color: #2c3e50; + border: none; + padding: 0; +} +.title-page .subtitle { + font-size: 1.5em; + color: #7f8c8d; + font-style: italic; + margin-bottom: 2em; +} +.title-page .author { + font-size: 1.25em; + color: #34495e; + margin-bottom: 1em; +} +.title-page .metadata { + margin-top: 3em; + font-size: 0.9em; + color: #666; + line-height: 1.6; +} +.title-page .metadata p { + text-indent: 0; + margin: 0.5em 0; +}`; + + // Extract data:image payloads from chapter XHTML and rewrite to packaged image files + type ImgEntry = { href: string; mediaType: string; base64: string; id: string }; + const processedChapters: { ch: EpubChapter; xhtml: string }[] = []; + const imageEntries: ImgEntry[] = []; + let imgIndex = 1; + const dataImgRegex = /(]*?src=")(data:(image\/[A-Za-z0-9.+-]+);base64,([A-Za-z0-9+/=]+))("[^>]*>)/g; + + for (const ch of chapters) { + let xhtml = ch.xhtml; + xhtml = xhtml.replace(dataImgRegex, (_m, p1, _src, mime, b64, p5) => { + const ext = mime.endsWith('jpeg') ? 'jpg' : (mime.split('/')[1] || 'png'); + const filename = `img-${String(imgIndex).padStart(4, '0')}.${ext}`; + const href = `images/${filename}`; + const id = `img${imgIndex}`; + imageEntries.push({ href, mediaType: mime, base64: b64, id }); + imgIndex++; + return `${p1}../${href}${p5}`; + }); + processedChapters.push({ ch, xhtml }); + } + + // Build manifest and spine including images + const manifestItemsText = processedChapters.map(({ ch }) => + `` + ).join('\n '); + const manifestItemsImages = imageEntries.map(img => + `` + ).join('\n '); + const spineItems2 = processedChapters.map(({ ch }) => ``).join('\n '); + + const contentOpf2 = ` + + + ${escapeXml(bookId)} + ${escapeXml(meta.title)} + ${lang} + ${meta.author ? `${escapeXml(meta.author)}` : ''} + ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} + ${meta.description ? `${escapeXml(meta.description)}` : ''} + ${new Date().toISOString()} + + + + + ${manifestItemsText} + ${manifestItemsImages ? `\n ${manifestItemsImages}` : ''} + + + ${spineItems2} + +`; + + // Create ZIP with JSZip + const zip = new JSZip(); + + // Add mimetype (must be first and uncompressed) + zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' }); + + // Add META-INF + zip.file('META-INF/container.xml', containerXml); + + // Add OEBPS content + zip.file(`${oebps}/content.opf`, contentOpf2); + zip.file(`${textDir}/nav.xhtml`, navXhtml); + zip.file(`${stylesDir}/stylesheet.css`, stylesheet); + + // Add processed chapter files and extracted images (with optional strict XML parse diagnostics) + const parseErrors: string[] = []; + for (const { ch, xhtml } of processedChapters) { + const wrapped = xhtmlWrap(ch.title, xhtml); + try { + const parser = new DOMParser(); + const doc = parser.parseFromString(wrapped, 'application/xhtml+xml'); + const hasError = + doc.getElementsByTagName('parsererror').length > 0 || + doc.getElementsByTagNameNS('*', 'parsererror').length > 0; + if (hasError) { + const txt = doc.documentElement.textContent || ''; + const msg = `[ParseError] ${ch.href}: ${txt.slice(0, 300)}`; + console.warn(msg); + parseErrors.push(msg); + } + } catch {} + zip.file(`${textDir}/${ch.href}`, wrapped); + } + for (const img of imageEntries) { + zip.file(`${oebps}/${img.href}`, img.base64, { base64: true }); + } + + // Attach diagnostics when parse errors are detected + if (parseErrors.length > 0) { + zip.file(`${oebps}/debug/parse-errors.txt`, parseErrors.join('\n')); + processedChapters.forEach(({ ch, xhtml }) => { + zip.file(`${oebps}/debug/text/${ch.href}.raw.xhtml`, xhtml); + }); + } + + // Generate and return ArrayBuffer + return await zip.generateAsync({ + type: 'arraybuffer', + mimeType: 'application/epub+zip' + }); +}; diff --git a/services/epub/sanitizers/xhtmlSanitizer.ts b/services/epub/sanitizers/xhtmlSanitizer.ts new file mode 100644 index 0000000000000000000000000000000000000000..68923c321a8d395a90c52bfe24dd61712bc0940b GIT binary patch literal 9257 zcmcIq{c_vJ5%1rAiLTR(2|5(0B>ggF%OlJ5*b`aKL`tHmtU4fYBoToCiUUM3j;DE$ z_T};<{q5czaQu)nooPKbCF1V(c7OZ5aJndqs;2i;7iV>q#`VR#US#xvrd6?^UKkGX zx2~dGXHl(&FR^$Qp=nW(7OKR{ zT2*P3r9VY=TIBcCt=t?JxvmY3cc*9cgnIM3E=R-Ra=8qbUxr0B8-D)i(WBulGlnbI z&%Hk3Q?&aWF6Fa-9kz^Zo*#ex?K2^;es7wTQ@Qzz{r$fTi&Af%yEu7u`qgjno2B`+ zh3G6@lvz4Wfe8a~3Y_Q$P$U8c1-h9|(>PUmEtm)-cRqY>@WL!kHUAOy{m_x4vmhUMLLaX0yWA7O|GM<>Tb}E_O!`k)()lmS(N80 zIRX;_!#5I5=CeM%zeiN5x~XzuEYx*W)%yFio(H`m@Ac{DpJjobJOQe@ijyJ+4j;r= zzEjnTvUE1Dmx@2>`0R|dv#0~QR;r{(Q$#w=XPL+27K`rGS-r|s063csCXH#`03owV zPoGk+*AFXIW-!uVc=@}%-~aP!I2-We1BjJQDHt|Y_EtZ98{p%|=qA!}m6r8rn1;2| zb#T+~JL!9n%ASEyR!VCiCr9txY;n5@q1r`&JAsNd=oWgdYDWF^EKu2q#+h2cE(QXr zQ^J6)XH#5eo;77~zg=XbEXrrl$9qy)XsL=pfi%nz=4+^0YW)K%*a3a8axu9z{i?E= zjCz1z7ftI-|25HD_LMuRI#>G`*$w#whE}Rw7{^7qVvq$6eFkU9Rsl?P>?5A&+WT${ zpF>(;42%7fiYXmcRkR8@UK$D^FBrlM6rd!^RBe4ioGxTw*?7!~C=+JN2Sf8kOclP% zTM%QvVzZ0vM#oE5pcB9^8^yrR@x+eKD@!PIakg6iWQeMVwsVI9p* z@f_cKhlo8$xN%g+b9&FN$A3C$`CA8ErG8X#4d||O2qDBJWEzlat-`_48Zx5Su?+&WM3De-sh(HG68<76v&Da~<$?GUML7u`d66i1 zpHlEE_Uzo$iR|vri@4#uNM%&v$5s-J*)d^>p8&PY)+*5I}Vr^ zi|nMm{La!+DoZa`I#S}qJ1BLYA=&KgIxSn~l__uzNK{5?ttpE^wTWFE*pbZgQSwovYiL%{p*_i9-rM?1wO#sDJmp z0Q}7&tzkh64xoB?{QT(rh$bsa)HG_Wam4eeL|W3KQM)j*>TN}7qjr(sC`=Jt+@4m^ zOoB5v|56npdb{qQdtiy~Zr4`q=0^X8C2?*8xZBJF2-gMm-7gYsA=fvmjM7RZL$x)j zifhCWF)ox_8g1t_{9sc3`pdtXe{W5h`L(6KzGCQp?ntj z;=-dDlc9`e#N=?<7kxUG=m%t!o?C4rD4K+g_5|Ji<4KXM4*BvL=&Gt^gl!yLxa}cN zPNV~tQHhPK;8citVHIN|jE7L5HJNd`PArfKc!7cy8T_yYVKPsRLR>E1D=O;51d5Brvy9RS4T6C*7nG zx$d2kMm&hKm8q8`#abTLgRncpk$4A_UenL)n&AKxs4%kOY-aFcwfuGt>+v>o%--Zj zJzjLee+gsYQmo7A;%O!V@J8J1C33E=Hu5-M0DXtNIoy%XHxfBhaCy9Cgd;8#{l`9? z*%{;-LrHp*CQXzf?Y0f%vVuq)ysV`zqUyRSTb$ZxR#}%pP1@3i+jNM+e`@U_)g`x7 zmsh;%Ni9-sh<%s1LM|*DJr6n_(1x4QsyZ*Mn;M#&?$KiiI}8HPA!LvWHVma7ixll_ z8!7(Or?1gS!W?sW7zVB85F?tD#c#6qG_gK+V_27q#rbheG25!;&Zv-t==n{9>Q2d6Ala?&B z?Xw`j+rr~FuiLiS?-N1G*S;LL#=&qx4c=g}E66`~mEWQJR~ePE-^ndIx^t&?1RUPw z#&^3lZQa8QAlxnmIJ?n<0(;|L^4J z;yHD6xVCkhM-Q|0of}@s7x^r%At~w4_e0B;yzD5NisUujyvPzXqTnlY;|bnp5bcEJ zu6HlZvPMGZ#`7t^mfQ`)|4udl`U3S)f!47yMGRV9m<@_Q0UBPn%j2NgkC|i(ZwF%+ z);<3>%lwH9KyB$X&VsfIwsn_OmMLED%Fx*rVCbFsr6{LDF0;6rGH*^~BEwytq-QNN zT<)R|UBhBtDI=_az={($D#fjy9^mtc;M7pp=f$(?Rc_m`pel;mt3=*|f9yvAxQw!E zfa$e^X0CRQneg|rCT9mSfPH}!sjBeo<(u!{@knK>7i1wMGmT_Y;??5vdJgf_%!di@ zuhB&^jJBMS+5>DDe!|=m3i@q8H~hMK$qi#nY!aiFv5fcDO@x;|LSwO6Y?b*;#;OyijBfE>pB{}CQOyhjgdtmCM}i!-!vzS*O`!#gZV zJpf~YkjF7H)w)p_i#;4V8#fp$A##ELJA)GqdLQ7}HrK9bgw*EiR!G8clVOtsi literal 0 HcmV?d00001 diff --git a/services/epub/templates/defaults.ts b/services/epub/templates/defaults.ts new file mode 100644 index 0000000..bc00acc --- /dev/null +++ b/services/epub/templates/defaults.ts @@ -0,0 +1,30 @@ +import { EpubTemplate } from '../types'; + +/** + * Default template for EPUB metadata + * This template can be customized by users to personalize their EPUB exports + */ +export const getDefaultTemplate = (): EpubTemplate => ({ + gratitudeMessage: `This translation was made possible through the remarkable capabilities of modern AI language models. We express our deep gratitude to the teams behind these technologies who have made creative translation accessible to everyone.`, + + projectDescription: `This e-book was generated using LexiconForge, an open-source AI translation platform that enables high-quality, creative translations of literature. The platform supports multiple AI providers and allows for collaborative refinement of translations.`, + + githubUrl: 'https://github.com/anantham/LexiconForge', + + additionalAcknowledgments: `Special thanks to the original authors whose creative works inspire these translations, and to the open-source community that makes tools like this possible. Translation is an art that bridges cultures and languages, bringing stories to new audiences worldwide.`, + + customFooter: '' +}); + +/** + * Creates a customizable template - users can override any field + */ +export const createCustomTemplate = (overrides: Partial): EpubTemplate => { + const def = getDefaultTemplate(); + const merge = (a: any, b: any): any => + Object.fromEntries(Object.keys({ ...a, ...b }).map(k => { + const av = (a as any)[k], bv = (b as any)[k]; + return [k, (av && typeof av === 'object' && bv && typeof bv === 'object') ? merge(av, bv) : (bv ?? av)]; + })); + return merge(def, overrides ?? {}); +}; diff --git a/services/epub/templates/novelConfig.ts b/services/epub/templates/novelConfig.ts new file mode 100644 index 0000000..4059236 --- /dev/null +++ b/services/epub/templates/novelConfig.ts @@ -0,0 +1,81 @@ +import { NovelConfig } from '../types'; + +/** + * Gets novel configuration based on URL or manual configuration + * This allows for novel-specific metadata like title, author, etc. + */ +export const getNovelConfig = (firstChapterUrl?: string, manualConfig?: Partial): NovelConfig => { + // Default configuration + const defaultConfig: NovelConfig = { + title: 'Translated Novel', + author: 'Unknown Author', + language: 'en', + originalLanguage: 'ja', + publisher: 'LexiconForge Community' + }; + + // Novel-specific configurations based on URL patterns + let novelSpecificConfig: Partial = {}; + + if (firstChapterUrl) { + if (firstChapterUrl.includes('kakuyomu.jp')) { + // Enhanced configuration based on Novel Updates data + novelSpecificConfig = { + title: 'The Reincarnation of the Strongest Exorcist in Another World', + author: 'Kosuzu Kiichi', + originalTitle: '最強陰陽師の異世界転生記 〜下僕の妖怪どもに比べてモンスターが弱すぎるんだが〜', + description: 'Haruyoshi, the strongest exorcist was on the verge of death after the betrayal of his companions. Hoping to be happy in the next life, he tried the secret technique of reincarnation and was sent to a different world! Born into a family of magicians, the magic he failed to inherit was nothing compared to his previous skills as an exorcist. "Who needs magic? I\'ll survive in this world with my old techniques!"', + genre: 'Action, Adventure, Fantasy, Harem, Romance', + originalLanguage: 'ja', + seriesName: 'The Reincarnation of the Strongest Exorcist', + volumeNumber: 1, + isbn: 'urn:uuid:strongest-exorcist-v1', + publisher: 'Futabasha (Original) / J-Novel Club (English)', + translationNotes: 'Translated from Japanese web novel published on Kakuyomu and Syosetu. Originally published in 2018 by Kosuzu Kiichi. Licensed by J-Novel Club for English publication. This is an AI-powered fan translation for educational and entertainment purposes.' + }; + } else if (firstChapterUrl.includes('booktoki468.com')) { + novelSpecificConfig = { + title: 'Dungeon Defense', + author: 'Yoo Heonhwa', + originalTitle: '던전 디펜스', + description: 'A dark fantasy novel about survival and strategy in a dungeon world where the protagonist must use cunning and manipulation to survive against overwhelming odds.', + genre: 'Dark Fantasy, Strategy, Psychological', + originalLanguage: 'ko', + seriesName: 'Dungeon Defense', + volumeNumber: 1, + isbn: 'urn:uuid:dungeon-defense-v1', + publisher: 'BookToki (Original)', + translationNotes: 'Translated from Korean web novel published on BookToki. Known for its complex psychological elements and strategic gameplay mechanics.' + }; + } else if (firstChapterUrl.includes('syosetu.com') || firstChapterUrl.includes('ncode.syosetu.com')) { + // Syosetu - Japanese web novel platform + novelSpecificConfig = { + title: 'Web Novel from Syosetu', + author: 'Unknown Syosetu Author', + originalTitle: '小説家になろう作品', + description: 'Japanese web novel from the popular Syosetu platform.', + genre: 'Web Novel, Japanese Literature', + originalLanguage: 'ja', + publisher: 'Syosetu (Original)', + translationNotes: 'Translated from Japanese web novel published on Syosetu (Shōsetsuka ni Narō).' + }; + } else if (firstChapterUrl.includes('novelupdates.com')) { + // Novel Updates - aggregator site + novelSpecificConfig = { + title: 'Novel from Novel Updates', + author: 'Unknown Author', + description: 'Novel sourced from Novel Updates database.', + genre: 'Various', + publisher: 'Novel Updates Community', + translationNotes: 'Novel information sourced from Novel Updates community database.' + }; + } + // Add more novel configurations as needed + } + + return { + ...defaultConfig, + ...novelSpecificConfig, + ...manualConfig + }; +}; diff --git a/services/epub/types.ts b/services/epub/types.ts index f11ceb1..6553092 100644 --- a/services/epub/types.ts +++ b/services/epub/types.ts @@ -1,272 +1,116 @@ -/** - * EPUB Export Pipeline Types - * - * Defines the contracts between each module in the export pipeline: - * 1. Data Collector → Collected Chapters - * 2. Asset Resolver → Resolved Assets - * 3. Content Builder → HTML/Manifest - * 4. Package Builder → Final EPUB Blob - * 5. Export Service → Orchestration - */ +import { AppSettings } from '../../types'; -import type { AppSettings, ImageCacheKey } from '../../types'; - -// ============================================================================ -// EXPORT OPTIONS -// ============================================================================ - -export interface EpubExportOptions { - /** Chapter ordering: by number or by navigation links */ - order: 'number' | 'navigation'; - - /** Include title page */ - includeTitlePage: boolean; - - /** Include statistics page at end */ - includeStatsPage: boolean; - - /** Enable HTML repair on translated content prior to export */ - enableHtmlRepair?: boolean; - - /** Optional list of chapter URLs to include (legacy pipeline support) */ - chapterUrls?: string[]; - - /** Optional manual configuration overrides (legacy pipeline support) */ - manualConfig?: unknown; - - /** Optional custom template identifier (legacy pipeline support) */ - customTemplate?: unknown; - - /** Custom EPUB metadata overrides */ - metadata?: { - gratitudeMessage?: string; - projectDescription?: string; - footer?: string | null; - }; - - /** Settings snapshot for statistics */ - settings: AppSettings; -} - -// ============================================================================ -// DATA COLLECTOR OUTPUT -// ============================================================================ - -export interface CollectedChapter { - /** Stable chapter ID */ - id: string; - - /** Chapter number (for ordering) */ - chapterNumber?: number; - - /** Original title */ +export interface ChapterForEpub { title: string; - - /** Original content (HTML) */ + originalTitle?: string; content: string; - - /** Translated title */ - translatedTitle?: string; - - /** Translated content (HTML) */ + originalUrl: string; + url?: string; + translatedTitle: string; translatedContent?: string; - - /** Footnotes */ - footnotes: Array<{ marker: string; text: string }>; - - /** Image references (not yet resolved) */ - imageReferences: Array<{ - placementMarker: string; - prompt: string; - cacheKey?: ImageCacheKey; - base64Fallback?: string; // Legacy data - }>; - - /** Translation metadata for statistics */ - translationMeta?: { - provider: string; - model: string; - cost: number; - tokens: number; - requestTime: number; - }; - - /** Navigation URLs */ prevUrl?: string | null; nextUrl?: string | null; -} - -export interface CollectedData { - chapters: CollectedChapter[]; - - /** Session metadata */ - metadata: { - novelTitle?: string; - totalChapters: number; - translatedChapters: number; - exportDate: string; + usageMetrics: { + totalTokens: number; + promptTokens: number; + completionTokens: number; + estimatedCost: number; + requestTime: number; + provider: string; + model: string; }; - - /** Warnings from collection phase */ - warnings: Array<{ - type: 'missing-translation' | 'missing-content' | 'ordering-gap'; - chapterId: string; - message: string; - }>; -} - -// ============================================================================ -// ASSET RESOLVER OUTPUT -// ============================================================================ - -export interface ResolvedAsset { - /** Internal asset ID (e.g., "img-ch1-ILLUSTRATION-1") */ - id: string; - - /** MIME type (e.g., "image/png", "audio/mpeg") */ - mimeType: string; - - /** Binary data */ - data: ArrayBuffer; - - /** File extension for manifest (e.g., "png", "mp3") */ - extension: string; - - /** Original source reference */ - sourceRef: { - chapterId: string; + images: Array<{ marker: string; - type: 'image' | 'audio'; - }; -} - -export interface ResolvedChapter extends CollectedChapter { - /** Image references now have resolved asset IDs */ - imageReferences: Array<{ - placementMarker: string; + imageData: string; // base64 data URL prompt: string; - assetId?: string; // Set by resolver if asset found - missing?: boolean; // True if cache miss }>; -} - -export interface ResolvedAssets { - chapters: ResolvedChapter[]; - assets: ResolvedAsset[]; - - /** Asset resolution warnings */ - warnings: Array<{ - type: 'cache-miss' | 'invalid-data' | 'conversion-failed'; - assetId: string; - chapterId: string; + footnotes?: Array<{ marker: string; - message: string; + text: string; }>; } -// ============================================================================ -// CONTENT BUILDER OUTPUT -// ============================================================================ - -export interface EpubManifestItem { - id: string; - href: string; - mediaType: string; - properties?: string; +export interface TranslationStats { + totalCost: number; + totalTime: number; + totalTokens: number; + chapterCount: number; + imageCount: number; + providerBreakdown: Record; + modelBreakdown: Record; } -export interface EpubSpineItem { - idref: string; - linear?: 'yes' | 'no'; +export interface TelemetryInsights { + totalEvents: number; + sessionDurationMs: number; + navigation: { count: number; totalMs: number; averageMs: number }; + hydration: { count: number; totalMs: number; averageMs: number }; + chapterReady: { count: number; totalMs: number; averageMs: number }; + exports?: { json?: { count: number; totalMs: number; averageMs: number }; epub?: { count: number; totalMs: number; averageMs: number } }; } -export interface EpubNavItem { +export interface NovelConfig { title: string; - href: string; - children?: EpubNavItem[]; + author: string; + originalTitle?: string; + description?: string; + genre?: string; + language: string; + originalLanguage?: string; + coverImage?: string; // base64 or URL + seriesName?: string; + volumeNumber?: number; + isbn?: string; + publisher?: string; + translationNotes?: string; } -export interface BuiltContent { - /** Per-chapter XHTML files */ - chapterFiles: Array<{ - filename: string; // e.g., "chapter-001.xhtml" - content: string; // XHTML string - chapterId: string; - }>; - - /** Optional title page XHTML */ - titlePage?: { - filename: string; - content: string; - }; - - /** Optional statistics page XHTML */ - statsPage?: { - filename: string; - content: string; - }; - - /** OPF manifest items (for content.opf) */ - manifestItems: EpubManifestItem[]; - - /** OPF spine items (reading order) */ - spineItems: EpubSpineItem[]; - - /** Navigation document structure */ - navigation: EpubNavItem[]; - - /** Package metadata */ - packageMeta: { - title: string; - language: string; - identifier: string; - date: string; - }; +export interface EpubTemplate { + gratitudeMessage?: string; + projectDescription?: string; + githubUrl?: string; + additionalAcknowledgments?: string; + customFooter?: string; } -// ============================================================================ -// PACKAGE BUILDER OUTPUT -// ============================================================================ - -export interface EpubPackage { - /** Final EPUB as blob */ - blob: Blob; - - /** File size in bytes */ - sizeBytes: number; - - /** Package validation result */ - validation: { - valid: boolean; - errors: string[]; - warnings: string[]; - }; +export interface EpubExportOptions { + title?: string; + author?: string; + description?: string; + chapters: ChapterForEpub[]; + settings: AppSettings; + template?: EpubTemplate; + novelConfig?: NovelConfig; + telemetryInsights?: TelemetryInsights; + includeTitlePage?: boolean; + includeStatsPage?: boolean; + customTemplate?: any; + manualConfig?: any; + chapterUrls?: string[]; } -// ============================================================================ -// EXPORT SERVICE (ORCHESTRATION) -// ============================================================================ - -export interface ExportProgress { - phase: 'collecting' | 'resolving' | 'building' | 'packaging' | 'complete' | 'error'; - percent: number; // 0-100 - message: string; - detail?: string; +export interface EpubChapter { + id: string; + title: string; + xhtml: string; + href: string; } -export type ProgressCallback = (progress: ExportProgress) => void; - -export interface ExportResult { - success: boolean; - blob?: Blob; - error?: string; - - /** Summary statistics */ - stats: { - totalChapters: number; - assetsResolved: number; - assetsMissing: number; - warnings: number; - durationMs: number; - }; -} +export interface EpubMeta { + title: string; + author: string; + description?: string; + language?: string; + identifier?: string; + publisher?: string; +} \ No newline at end of file diff --git a/services/epubService.ts b/services/epubService.ts index 1d07966..9984356 100644 --- a/services/epubService.ts +++ b/services/epubService.ts @@ -1,1280 +1,55 @@ -import { SessionChapterData, AppSettings } from '../types'; -import JSZip from 'jszip'; -import { toStrictXhtml } from './translate/HtmlSanitizer'; - -// XHTML/XML namespaces used for strict XML serialization -const XHTML_NS = 'http://www.w3.org/1999/xhtml'; -const XML_NS = 'http://www.w3.org/XML/1998/namespace'; -const EPUB_NS = 'http://www.idpf.org/2007/ops'; -const XLINK_NS = 'http://www.w3.org/1999/xlink'; - -// Simplified XML Name validation (sufficient for XHTML attribute names) -const XML_NAME = /^[A-Za-z_][A-Za-z0-9._:-]*$/; - -// Basic bans for unsafe attributes -function isBannedAttr(name: string) { - return name.startsWith('on') || name === 'srcdoc'; -} - -// Very lightweight CSS sanitizer; keep as a single attribute -function sanitizeStyle(value: string) { - const v = (value ?? '').replace(/[\u0000-\u001F\u007F]/g, ''); - if (/url\s*\(\s*javascript:/i.test(v)) return ''; - if (/expression\s*\(/i.test(v)) return ''; - return v.trim(); -} - -function setAttrNS(el: Element, name: string, value: string) { - if (name === 'xml:lang') { el.setAttributeNS(XML_NS, name, value); return; } - if (name.startsWith('epub:')) { el.setAttributeNS(EPUB_NS, name, value); return; } - if (name.startsWith('xlink:')) { el.setAttributeNS(XLINK_NS, name, value); return; } - el.setAttribute(name, value); -} - -function copyAttributesSafely(srcEl: Element, dstEl: Element) { - for (const attr of Array.from(srcEl.attributes)) { - let name = attr.name; - let value = attr.value ?? ''; - - // Keep style as a single attribute; do not expand/split - if (name.toLowerCase() === 'style') { - const s = sanitizeStyle(value); - if (s) dstEl.setAttribute('style', s); - continue; - } - - // Drop unsafe attributes - if (isBannedAttr(name)) continue; - - // Validate XML name to avoid InvalidCharacterError (e.g., 'down;') - if (!XML_NAME.test(name)) { - try { console.warn('[EPUB XClone] Dropping invalid attribute', name, 'on <' + srcEl.tagName + '>'); } catch {} - continue; - } - - // reject unknown namespace prefixes (avoid unbound prefixes) - if (name.includes(':')) { - const [prefix] = name.split(':', 1); - const ok = prefix === 'xml' || prefix === 'epub' || prefix === 'xlink'; - if (!ok) continue; - } - - // Normalize non-namespaced names to lowercase - if (!name.includes(':')) name = name.toLowerCase(); - - try { - setAttrNS(dstEl, name, value); - } catch (e) { - try { - const snippet = (srcEl as any).outerHTML ? (srcEl as any).outerHTML.slice(0, 160).replace(/\s+/g, ' ') : `<${srcEl.tagName}>`; - console.warn('[EPUB XClone] Could not set attribute', name, 'value=', value, 'on', snippet, e); - } catch {} - // Continue without throwing - } - } -} - -// Clone an HTML node tree into an XHTML XMLDocument parent -function cloneIntoXhtml(srcNode: Node, xdoc: XMLDocument, dstParent: Element) { - switch (srcNode.nodeType) { - case Node.ELEMENT_NODE: { - const srcEl = srcNode as Element; - // Lowercase localName for XHTML consistency; guard invalid names - const name = srcEl.localName.toLowerCase(); - const isValidXmlLocalName = /^[A-Za-z_][A-Za-z0-9._-]*$/.test(name); - if (!isValidXmlLocalName) { - // Skip invalid element; clone its children directly into parent - for (const child of Array.from(srcEl.childNodes)) { - cloneIntoXhtml(child, xdoc, dstParent); - } - break; - } - const el = xdoc.createElementNS(XHTML_NS, name); - // Copy attributes safely (validated + namespaced) - copyAttributesSafely(srcEl, el); - // Ensure has alt for accessibility nicety - if (el.localName === 'img' && !el.hasAttribute('alt')) { - el.setAttribute('alt', ''); - } - // Avoid scripts in EPUB content - if (el.localName !== 'script') { - for (const child of Array.from(srcEl.childNodes)) { - cloneIntoXhtml(child, xdoc, el); - } - } - dstParent.appendChild(el); - break; - } - case Node.TEXT_NODE: { - dstParent.appendChild(xdoc.createTextNode((srcNode as Text).data)); - break; - } - // Omit comments/CDATA by default for chapters - default: - break; - } -} - -// Convert an HTML fragment string into serialized XHTML fragment -function htmlFragmentToXhtml(fragmentHtml: string): string { - // Repair common broken void tags like then the quote remains as text - fragmentHtml = fragmentHtml - .replace(/') - .replace(/') - .replace(/'); - // 1) Tolerant parse as HTML - const htmlDoc = new DOMParser().parseFromString(fragmentHtml, 'text/html'); - // 2) Create fresh XHTML document and a container - const xdoc = document.implementation.createDocument(XHTML_NS, 'html', null); - const htmlEl = xdoc.documentElement; - // Bind common namespaces used by EPUB content - htmlEl.setAttribute('xmlns:epub', EPUB_NS); - // Default language; may be overridden per element via xml:lang during cloning - if (!htmlEl.hasAttribute('xml:lang')) htmlEl.setAttributeNS(XML_NS, 'xml:lang', 'en'); - const body = xdoc.createElementNS(XHTML_NS, 'body'); - htmlEl.appendChild(body); - // 3) Clone children into XHTML body - for (const node of Array.from(htmlDoc.body.childNodes)) { - cloneIntoXhtml(node, xdoc, body); - } - // 4) Serialize children individually to avoid wrapping markup - const serializer = new XMLSerializer(); - const parts: string[] = []; - for (const child of Array.from(body.childNodes)) { - parts.push(serializer.serializeToString(child as any)); - } - let xhtml = parts.join(''); - // 5) Prefer numeric nbsp entity for max compatibility - xhtml = xhtml.replace(/\u00A0/g, ' '); - return xhtml; -} - -// Very small allowlist sanitizer for inline/basic block tags used in chapters -function sanitizeHtmlAllowlist(html: string): string { - const allowedTags = new Set([ - 'i','em','b','strong','u','s','br','sup','sub','a','p','ul','ol','li','span' - ]); - const doc = new DOMParser().parseFromString(html, 'text/html'); - const body = doc.body; - - const unwrapNode = (node: Element) => { - const parent = node.parentNode; - if (!parent) return; - while (node.firstChild) parent.insertBefore(node.firstChild, node); - parent.removeChild(node); - }; - - const isSafeHref = (href: string): boolean => { - try { - const url = new URL(href, 'https://example.com'); - const proto = (url.protocol || '').toLowerCase(); - return proto === 'http:' || proto === 'https:' || proto === 'mailto:'; - } catch { return false; } - }; - - const sanitizeEl = (el: Element) => { - // Copy array since we'll mutate children - for (const child of Array.from(el.childNodes)) { - if (child.nodeType === Node.COMMENT_NODE) { - el.removeChild(child); - continue; - } - if (child.nodeType === Node.ELEMENT_NODE) { - const c = child as Element; - const tag = c.tagName.toLowerCase(); - if (!allowedTags.has(tag)) { - // unwrap unknown element, keep its children - unwrapNode(c); - continue; - } - // Strip disallowed attributes - for (const attr of Array.from(c.attributes)) { - const name = attr.name.toLowerCase(); - const value = attr.value; - const isEvent = name.startsWith('on'); - if (isEvent || name === 'style') { c.removeAttribute(attr.name); continue; } - if (tag === 'a') { - if (name === 'href') { - if (!isSafeHref(value)) c.removeAttribute('href'); - continue; - } - if (name === 'title') continue; - // drop everything else on - c.removeAttribute(attr.name); - continue; - } - if (tag === 'span') { - // Keep our placeholders only - if (name === 'data-illu' || name === 'data-fn') continue; - c.removeAttribute(attr.name); - continue; - } - // For other allowed tags: drop all attributes - c.removeAttribute(attr.name); - } - sanitizeEl(c); - } - } - }; - sanitizeEl(body); - return body.innerHTML; -} - -// Replace newline characters in text nodes with
elements for display parity -function convertNewlinesToBrInElement(root: Element) { - const walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT, null); - const textNodes: Text[] = []; - let node: Node | null; - while ((node = walker.nextNode())) { - const t = node as Text; - if (t.data.includes('\n')) textNodes.push(t); - } - for (const t of textNodes) { - const parts = t.data.split(/\n/); - const frag = document.createDocumentFragment(); - parts.forEach((part, idx) => { - if (part) frag.appendChild(document.createTextNode(part)); - if (idx < parts.length - 1) frag.appendChild(document.createElement('br')); - }); - t.parentNode?.replaceChild(frag, t); - } -} - -export interface ChapterForEpub { - title: string; - originalTitle?: string; - content: string; - originalUrl: string; - url?: string; - translatedTitle: string; - translatedContent?: string; - prevUrl?: string | null; - nextUrl?: string | null; - usageMetrics: { - totalTokens: number; - promptTokens: number; - completionTokens: number; - estimatedCost: number; - requestTime: number; - provider: string; - model: string; - }; - images: Array<{ - marker: string; - imageData: string; // base64 data URL - prompt: string; - }>; - footnotes?: Array<{ - marker: string; - text: string; - }>; -} - -export interface TranslationStats { - totalCost: number; - totalTime: number; - totalTokens: number; - chapterCount: number; - imageCount: number; - providerBreakdown: Record; - modelBreakdown: Record; -} - -export interface TelemetryInsights { - totalEvents: number; - sessionDurationMs: number; - navigation: { count: number; totalMs: number; averageMs: number }; - hydration: { count: number; totalMs: number; averageMs: number }; - chapterReady: { count: number; totalMs: number; averageMs: number }; - exports?: { json?: { count: number; totalMs: number; averageMs: number }; epub?: { count: number; totalMs: number; averageMs: number } }; -} - -export interface NovelConfig { - title: string; - author: string; - originalTitle?: string; - description?: string; - genre?: string; - language: string; - originalLanguage?: string; - coverImage?: string; // base64 or URL - seriesName?: string; - volumeNumber?: number; - isbn?: string; - publisher?: string; - translationNotes?: string; -} - -export interface EpubTemplate { - gratitudeMessage?: string; - projectDescription?: string; - githubUrl?: string; - additionalAcknowledgments?: string; - customFooter?: string; -} - -export interface EpubExportOptions { - title?: string; - author?: string; - description?: string; - chapters: ChapterForEpub[]; - settings: AppSettings; - template?: EpubTemplate; - novelConfig?: NovelConfig; - telemetryInsights?: TelemetryInsights; - includeTitlePage?: boolean; - includeStatsPage?: boolean; - customTemplate?: any; - manualConfig?: any; - chapterUrls?: string[]; -} - -/** - * Collects active version chapters from session data for EPUB export - * Uses activeVersion tracking to determine which translation to include - */ -export const collectActiveVersions = ( - sessionData: Record, - urlHistory: string[] -): ChapterForEpub[] => { - const chapters: ChapterForEpub[] = []; - - // Use urlHistory for ordering, but also include any chapters not in history - // First, process chapters in urlHistory order to maintain chronological sequence - const processedUrls = new Set(); - - // Add chapters from urlHistory first (in order) - for (const url of urlHistory) { - if (sessionData[url]?.chapter && sessionData[url]?.translationResult) { - processedUrls.add(url); - const data = sessionData[url]; - chapters.push(createChapterForEpub(data, url)); - } - } - - // Then add any remaining chapters not in urlHistory (sorted by URL for consistency) - const remainingUrls = Object.keys(sessionData) - .filter(url => !processedUrls.has(url)) - .sort(); - - for (const url of remainingUrls) { - const data = sessionData[url]; - if (!data?.chapter || !data?.translationResult) { - console.log(`[EPUBService] Skipping ${url} - missing chapter or translation result`); - continue; - } - - chapters.push(createChapterForEpub(data, url)); - } - - console.log(`[EPUBService] Prepared ${chapters.length} chapters for EPUB in chronological order`); - return chapters; -}; - -/** - * Creates a ChapterForEpub object from session data - */ -const createChapterForEpub = (data: any, url: string): ChapterForEpub => { - // Create default metrics for chapters missing usage data - let metrics = data.translationResult.usageMetrics; - - if (!metrics) { - console.warn(`[EPUBService] Chapter ${url} missing usageMetrics - using defaults for statistics`); - metrics = { - totalTokens: 0, - promptTokens: 0, - completionTokens: 0, - estimatedCost: 0, - requestTime: 0, - provider: 'Unknown', - model: 'Unknown' - }; - } else { - // Validate and fix invalid metrics values - const fixedMetrics = { - totalTokens: typeof metrics.totalTokens === 'number' && isFinite(metrics.totalTokens) ? metrics.totalTokens : 0, - promptTokens: typeof metrics.promptTokens === 'number' && isFinite(metrics.promptTokens) ? metrics.promptTokens : 0, - completionTokens: typeof metrics.completionTokens === 'number' && isFinite(metrics.completionTokens) ? metrics.completionTokens : 0, - estimatedCost: typeof metrics.estimatedCost === 'number' && isFinite(metrics.estimatedCost) ? metrics.estimatedCost : 0, - requestTime: typeof metrics.requestTime === 'number' && isFinite(metrics.requestTime) ? metrics.requestTime : 0, - provider: typeof metrics.provider === 'string' ? metrics.provider : 'Unknown', - model: typeof metrics.model === 'string' ? metrics.model : 'Unknown' - }; - - // Check if we had to fix any values - const hadInvalidData = Object.keys(metrics).some(key => - metrics[key] !== fixedMetrics[key] - ); - - if (hadInvalidData) { - console.warn(`[EPUBService] Chapter ${url} had invalid usageMetrics - fixed for statistics:`, { - original: metrics, - fixed: fixedMetrics - }); - } - - metrics = fixedMetrics; - } - - // Get images from translation result - const images = data.translationResult.suggestedIllustrations?.map(illust => ({ - marker: illust.placementMarker, - imageData: illust.url || '', // This should be base64 data from generation - prompt: illust.imagePrompt - })) || []; - - // Get footnotes from translation result - const footnotes = data.translationResult.footnotes?.map(footnote => ({ - marker: footnote.marker, - text: footnote.text - })) || []; - - const translatedContent = data.translationResult.translation || ''; - - return { - title: data.chapter.title, - originalTitle: data.chapter.originalTitle || data.chapter.title, - content: data.chapter.content, - originalUrl: url, - url, - translatedTitle: data.translationResult.translatedTitle, - translatedContent, - prevUrl: data.chapter.prevUrl ?? null, - nextUrl: data.chapter.nextUrl ?? null, - usageMetrics: { - totalTokens: metrics.totalTokens, - promptTokens: metrics.promptTokens, - completionTokens: metrics.completionTokens, - estimatedCost: metrics.estimatedCost, - requestTime: metrics.requestTime, - provider: metrics.provider, - model: metrics.model, - }, - images: images.filter(img => img.imageData), // Only include images with data - footnotes: footnotes - }; -}; - -/** - * Calculates comprehensive statistics from collected chapters - */ -export const calculateTranslationStats = (chapters: ChapterForEpub[]): TranslationStats => { - const stats: TranslationStats = { - totalCost: 0, - totalTime: 0, - totalTokens: 0, - chapterCount: chapters.length, - imageCount: 0, - providerBreakdown: {}, - modelBreakdown: {} - }; - - chapters.forEach(chapter => { - const metrics = chapter.usageMetrics; - - // Aggregate totals - stats.totalCost += metrics.estimatedCost; - stats.totalTime += metrics.requestTime; - stats.totalTokens += metrics.totalTokens; - stats.imageCount += chapter.images.length; - - // Provider breakdown - if (!stats.providerBreakdown[metrics.provider]) { - stats.providerBreakdown[metrics.provider] = { - chapters: 0, - cost: 0, - time: 0, - tokens: 0 - }; - } - const providerStats = stats.providerBreakdown[metrics.provider]; - providerStats.chapters += 1; - providerStats.cost += metrics.estimatedCost; - providerStats.time += metrics.requestTime; - providerStats.tokens += metrics.totalTokens; - - // Model breakdown - if (!stats.modelBreakdown[metrics.model]) { - stats.modelBreakdown[metrics.model] = { - chapters: 0, - cost: 0, - time: 0, - tokens: 0 - }; - } - const modelStats = stats.modelBreakdown[metrics.model]; - modelStats.chapters += 1; - modelStats.cost += metrics.estimatedCost; - modelStats.time += metrics.requestTime; - modelStats.tokens += metrics.totalTokens; - }); - - return stats; -}; - -/** - * Default template for EPUB metadata - * This template can be customized by users to personalize their EPUB exports - */ -export const getDefaultTemplate = ():EpubTemplate => ({ - gratitudeMessage: `This translation was made possible through the remarkable capabilities of modern AI language models. We express our deep gratitude to the teams behind these technologies who have made creative translation accessible to everyone.`, - - projectDescription: `This e-book was generated using LexiconForge, an open-source AI translation platform that enables high-quality, creative translations of literature. The platform supports multiple AI providers and allows for collaborative refinement of translations.`, - - githubUrl: 'https://github.com/anantham/LexiconForge', - - additionalAcknowledgments: `Special thanks to the original authors whose creative works inspire these translations, and to the open-source community that makes tools like this possible. Translation is an art that bridges cultures and languages, bringing stories to new audiences worldwide.`, - - customFooter: '' -}); - -/** - * Creates a customizable template - users can override any field - * Example usage: - * const myTemplate = createCustomTemplate({ - * gratitudeMessage: 'My custom gratitude message...', - * githubUrl: 'https://github.com/myuser/myproject' - * }); - */ -export const createCustomTemplate = (overrides: Partial): EpubTemplate => { - const def = getDefaultTemplate(); - const merge = (a: any, b: any): any => - Object.fromEntries(Object.keys({ ...a, ...b }).map(k => { - const av = (a as any)[k], bv = (b as any)[k]; - return [k, (av && typeof av === 'object' && bv && typeof bv === 'object') ? merge(av, bv) : (bv ?? av)]; - })); - return merge(def, overrides ?? {}); -}; - -/** - * Gets novel configuration based on URL or manual configuration - * This allows for novel-specific metadata like title, author, etc. - */ -export const getNovelConfig = (firstChapterUrl?: string, manualConfig?: Partial): NovelConfig => { - // Default configuration - const defaultConfig: NovelConfig = { - title: 'Translated Novel', - author: 'Unknown Author', - language: 'en', - originalLanguage: 'ja', - publisher: 'LexiconForge Community' - }; - - // Novel-specific configurations based on URL patterns - let novelSpecificConfig: Partial = {}; - - if (firstChapterUrl) { - if (firstChapterUrl.includes('kakuyomu.jp')) { - // Enhanced configuration based on Novel Updates data - novelSpecificConfig = { - title: 'The Reincarnation of the Strongest Exorcist in Another World', - author: 'Kosuzu Kiichi', - originalTitle: '最強陰陽師の異世界転生記 〜下僕の妖怪どもに比べてモンスターが弱すぎるんだが〜', - description: 'Haruyoshi, the strongest exorcist was on the verge of death after the betrayal of his companions. Hoping to be happy in the next life, he tried the secret technique of reincarnation and was sent to a different world! Born into a family of magicians, the magic he failed to inherit was nothing compared to his previous skills as an exorcist. "Who needs magic? I\'ll survive in this world with my old techniques!"', - genre: 'Action, Adventure, Fantasy, Harem, Romance', - originalLanguage: 'ja', - seriesName: 'The Reincarnation of the Strongest Exorcist', - volumeNumber: 1, - isbn: 'urn:uuid:strongest-exorcist-v1', - publisher: 'Futabasha (Original) / J-Novel Club (English)', - translationNotes: 'Translated from Japanese web novel published on Kakuyomu and Syosetu. Originally published in 2018 by Kosuzu Kiichi. Licensed by J-Novel Club for English publication. This is an AI-powered fan translation for educational and entertainment purposes.' - }; - } else if (firstChapterUrl.includes('booktoki468.com')) { - novelSpecificConfig = { - title: 'Dungeon Defense', - author: 'Yoo Heonhwa', - originalTitle: '던전 디펜스', - description: 'A dark fantasy novel about survival and strategy in a dungeon world where the protagonist must use cunning and manipulation to survive against overwhelming odds.', - genre: 'Dark Fantasy, Strategy, Psychological', - originalLanguage: 'ko', - seriesName: 'Dungeon Defense', - volumeNumber: 1, - isbn: 'urn:uuid:dungeon-defense-v1', - publisher: 'BookToki (Original)', - translationNotes: 'Translated from Korean web novel published on BookToki. Known for its complex psychological elements and strategic gameplay mechanics.' - }; - } else if (firstChapterUrl.includes('syosetu.com') || firstChapterUrl.includes('ncode.syosetu.com')) { - // Syosetu - Japanese web novel platform - novelSpecificConfig = { - title: 'Web Novel from Syosetu', - author: 'Unknown Syosetu Author', - originalTitle: '小説家になろう作品', - description: 'Japanese web novel from the popular Syosetu platform.', - genre: 'Web Novel, Japanese Literature', - originalLanguage: 'ja', - publisher: 'Syosetu (Original)', - translationNotes: 'Translated from Japanese web novel published on Syosetu (Shōsetsuka ni Narō).' - }; - } else if (firstChapterUrl.includes('novelupdates.com')) { - // Novel Updates - aggregator site - novelSpecificConfig = { - title: 'Novel from Novel Updates', - author: 'Unknown Author', - description: 'Novel sourced from Novel Updates database.', - genre: 'Various', - publisher: 'Novel Updates Community', - translationNotes: 'Novel information sourced from Novel Updates community database.' - }; - } - // Add more novel configurations as needed - } - - return { - ...defaultConfig, - ...novelSpecificConfig, - ...manualConfig - }; -}; - -/** - * Generates a professional title page using novel metadata - */ -const generateTitlePage = (novelConfig: NovelConfig, stats: TranslationStats): string => { - let titlePageHtml = `
-`; - - // Main title - titlePageHtml += `

${escapeXml(novelConfig.title)}

-`; - - // Original title (if different) - if (novelConfig.originalTitle && novelConfig.originalTitle !== novelConfig.title) { - titlePageHtml += `
${escapeXml(novelConfig.originalTitle)}
-`; - } - - // Author - titlePageHtml += `
by ${escapeXml(novelConfig.author)}
-`; - - // Metadata section - titlePageHtml += ` -`; // metadata - titlePageHtml += `
-`; // title-page - - return titlePageHtml; -}; - -/** - * Generates a comprehensive table of contents page with navigation links - */ -const generateTableOfContents = (chapters: ChapterForEpub[], includeStatsPage: boolean): string => { - let tocHtml = `

Table of Contents

\n\n`; - - tocHtml += `
-`; - tocHtml += `

This translation contains ${chapters.length} chapters

-`; - tocHtml += `
\n\n`; - - tocHtml += `
    -`; - - chapters.forEach((chapter, index) => { - const chapterTitle = chapter.translatedTitle || chapter.title || `Chapter ${index + 1}`; - const chapterHref = `chapter-${String(index + 1).padStart(4, '0')}.xhtml`; - - tocHtml += `
  1. -`; - tocHtml += ` ${escapeXml(chapterTitle)} -`; - tocHtml += `
    -`; - tocHtml += ` Translated with ${escapeXml(chapter.usageMetrics.provider)} ${escapeXml(chapter.usageMetrics.model)} -`; - if (chapter.images && chapter.images.length > 0) { - tocHtml += ` • ${chapter.images.length} illustration${chapter.images.length > 1 ? 's' : ''}`; - } - if (chapter.footnotes && chapter.footnotes.length > 0) { - tocHtml += ` • ${chapter.footnotes.length} footnote${chapter.footnotes.length > 1 ? 's' : ''}`; - } - tocHtml += `
    -`; - tocHtml += `
  2. -`; - }); - - // Optionally include special sections at the end - if (includeStatsPage) { - tocHtml += `
  3. -`; - tocHtml += ` Acknowledgments -`; - tocHtml += `
  4. -`; - } - tocHtml += `
-`; - - return tocHtml; -}; - -/** - * Generates a detailed statistics and acknowledgments page - */ -const renderTelemetryInsights = (telemetry?: TelemetryInsights): string => { - if (!telemetry) return ''; - - const formatMs = (ms: number): string => { - if (!Number.isFinite(ms)) return '—'; - if (ms < 1000) return `${ms.toFixed(0)} ms`; - const seconds = ms / 1000; - if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)} s`; - const minutes = seconds / 60; - if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)} min`; - const hours = minutes / 60; - return `${hours.toFixed(2)} h`; - }; - - const renderRow = (label: string, data?: { count: number; totalMs: number; averageMs: number }) => { - if (!data || data.count === 0) return ''; - return ` - - ${label} - ${data.count} - ${formatMs(data.totalMs)} - ${formatMs(data.averageMs)} - `; - }; - - const rows = [ - renderRow('Navigation requests', telemetry.navigation), - renderRow('IndexedDB hydration', telemetry.hydration), - renderRow('Chapter ready-to-read', telemetry.chapterReady), - renderRow('JSON exports', telemetry.exports?.json), - renderRow('EPUB exports', telemetry.exports?.epub) - ].filter(Boolean).join(''); - - let html = `
-`; - html += `

Session Insights

-`; - html += `

Recorded via LexiconForge telemetry during preparation of this EPUB.

-`; - html += `
-`; - html += `
-`; - html += `
${telemetry.totalEvents.toLocaleString()}
-`; - html += `
Telemetry Events
-`; - html += `
-`; - html += `
-`; - html += `
${formatMs(telemetry.sessionDurationMs)}
-`; - html += `
Session Duration
-`; - html += `
-`; - html += `
-`; - - if (rows) { - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += rows; - html += ` -`; - html += `
ActivityOccurrencesTotal DurationAverage Duration
-`; - } - - html += `
- -`; - return html; -}; - -const generateStatsAndAcknowledgments = (stats: TranslationStats, template: EpubTemplate, telemetry?: TelemetryInsights): string => { - let html = `

Acknowledgments

\\n\\n`; - - // Project description - html += `
-`; - html += `

About This Translation

-`; - html += `

${escapeXml(template.projectDescription || '')}

-`; - if (template.githubUrl) { - html += `

Source Code: ${escapeXml(template.githubUrl)}

-`; - } - html += `
\n\n`; - - // Translation statistics - html += `
-`; - html += `

Translation Statistics

-`; - - html += `
-`; - html += `
-`; - html += `
${stats.chapterCount}
-`; - html += `
Chapters
-`; - html += `
-`; - html += `
-`; - html += `
$${stats.totalCost.toFixed(4)}
-`; - html += `
Total Cost
-`; - html += `
-`; - html += `
-`; - html += `
${Math.round(stats.totalTime)}s
-`; - html += `
Total Time
-`; - html += `
-`; - html += `
-`; - html += `
${stats.totalTokens.toLocaleString()}
-`; - html += `
Total Tokens
-`; - html += `
-`; - if (stats.imageCount > 0) { - html += `
-`; - html += `
${stats.imageCount}
-`; - html += `
Images Generated
-`; - html += `
-`; - } - html += `
-`; - html += `
\n\n`; - - html += renderTelemetryInsights(telemetry); - - // Provider breakdown - const providers = Object.keys(stats.providerBreakdown); - if (providers.length > 0) { - html += `
-`; - html += `

Translation Providers Used

-`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - - providers.forEach(provider => { - const providerStats = stats.providerBreakdown[provider]; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - }); - - html += ` -`; - html += `
ProviderChaptersCostTime
${escapeXml(provider)}${providerStats.chapters}$${providerStats.cost.toFixed(4)}${Math.round(providerStats.time)}s
-`; - html += `
\n\n`; - } - - // Model breakdown (top 10 most used) - const models = Object.entries(stats.modelBreakdown) - .sort(([,a], [,b]) => b.chapters - a.chapters) - .slice(0, 10); - - if (models.length > 0) { - html += `
-`; - html += `

AI Models Used

-`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - - models.forEach(([model, modelStats]) => { - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - }); - - html += ` -`; - html += `
ModelChaptersTokens
${escapeXml(model)}${modelStats.chapters}${modelStats.tokens.toLocaleString()}
-`; - html += `
\n\n`; - } - - // Gratitude message - html += `
-`; - html += `

Acknowledgments

-`; - html += `

${escapeXml(template.gratitudeMessage || '')}

-`; - if (template.additionalAcknowledgments) { - html += `

${escapeXml(template.additionalAcknowledgments)}

-`; - } - html += `
\n\n`; - - // Footer - if (template.customFooter) { - html += `
-`; - html += `${escapeXml(template.customFooter)} -`; - html += `
-`; - } - - html += `
-`; - html += `

Translation completed on ${new Date().toLocaleDateString()}

-`; - html += `
-`; - - return html; -}; - -/** - * Converts chapter content with illustrations and footnotes to XHTML suitable for EPUB - */ -const convertChapterToHtml = (chapter: ChapterForEpub): string => { - let htmlContent = chapter.translatedTitle ? - `

${escapeXml(chapter.translatedTitle)}

\n\n` : - `

${escapeXml(chapter.title)}

\n\n`; - - // Get the translated content, fallback to original if needed - let content = chapter.content; - - // Process content and embed images - if (chapter.images.length > 0) { - // Replace illustration markers with actual images - for (const image of chapter.images) { - const imgHtml = `
- \"${escapeXml(image.prompt)}\" -

${escapeXml(image.prompt)}

-
`; - - content = content.replace(image.marker, imgHtml); - } - } - - // Process and embed footnotes - if (chapter.footnotes && chapter.footnotes.length > 0) { - // Replace footnote markers with links - for (const footnote of chapter.footnotes) { - const footnoteLink = `[${footnote.marker}]`; - content = content.replace(`[${footnote.marker}]`, footnoteLink); - } - - // Add footnotes section at the end - let footnotesHtml = '
\n

Footnotes

\n
    \n'; - for (const footnote of chapter.footnotes) { - footnotesHtml += `
  1. -`; - footnotesHtml += ` ${escapeXml(footnote.text)} -`; - footnotesHtml += ` -`; - footnotesHtml += `
  2. \n`; - } - footnotesHtml += '
\n
\n'; - content += '\n' + footnotesHtml; - } - - // Convert content to proper XHTML paragraphs - content = convertToXhtmlParagraphs(content); - - htmlContent += content; - - return htmlContent; -}; - -/** - * Converts text content to proper XHTML paragraphs without invalid nesting - */ -const convertToXhtmlParagraphs = (content: string): string => { - // First, escape any remaining unescaped XML entities - content = content.replace(/&(?!(amp|lt|gt|quot|apos);)/g, '&'); - - // Split content by double newlines to create paragraphs - const paragraphs = content.split(/\n\s*\n/); - - let xhtmlContent = ''; - - for (let para of paragraphs) { - para = para.trim(); - if (!para) continue; - - // Check if this paragraph already contains block-level HTML elements - const hasBlockElements = /<(div|p|h[1-6]|ul|ol|li|blockquote|pre|hr|table|form|fieldset|address|center)[^>]*>/i.test(para); - - if (hasBlockElements) { - // Already has block elements, just add it as-is but fix line breaks - para = para.replace(/\n/g, ' '); // Convert single line breaks to spaces within block elements - xhtmlContent += para + '\n\n'; - } else { - // Regular text paragraph - wrap in

and convert line breaks to
- para = para.replace(/\n/g, '
'); // Use self-closing br tags for XHTML - xhtmlContent += `

${para}

\n\n`; - } - } - - return xhtmlContent.trim(); -}; - -/** - * Build chapter XHTML using DOM nodes (footnotes visible inline and at end) - */ -const buildChapterXhtml = (chapter: ChapterForEpub): string => { - const root = document.createElement('div'); - // Title - const h1 = document.createElement('h1'); - h1.textContent = chapter.translatedTitle || chapter.title; - root.appendChild(h1); - - // 1) Inject placeholders for markers - const withIllu = chapter.content.replace(/\[(ILLUSTRATION-\d+[A-Za-z]*) \]/g, (_m, marker) => { - return ``; - }); - const withPlaceholders = withIllu.replace(/\[(\d+)\]/g, (_m, n) => ``); - - // 2) Sanitize with tight allowlist to preserve inline tags safely - const sanitized = sanitizeHtmlAllowlist(withPlaceholders); - - // 3) Materialize into a working container and normalize newlines to
- const container = document.createElement('div'); - container.innerHTML = sanitized; - convertNewlinesToBrInElement(container); - - // 4) Replace placeholders with generated illustration blocks and footnote refs - const imagesByMarker = new Map( - chapter.images.map(i => [i.marker, i]) - ); - for (const span of Array.from(container.querySelectorAll('span[data-illu]'))) { - const marker = (span as HTMLElement).getAttribute('data-illu') || ''; - const img = imagesByMarker.get(`[${marker}]`) || imagesByMarker.get(marker); - if (img) { - const wrap = document.createElement('div'); - wrap.setAttribute('class', 'illustration'); - const im = document.createElement('img'); - im.setAttribute('src', img.imageData); - im.setAttribute('alt', img.prompt); - im.setAttribute('style', 'max-width: 100%; height: auto; display: block; margin: 1em auto;'); - const cap = document.createElement('p'); - cap.setAttribute('class', 'illustration-caption'); - cap.setAttribute('style', 'text-align: center; font-style: italic; color: #666; font-size: 0.9em; margin-top: 0.5em;'); - cap.textContent = img.prompt; - wrap.appendChild(im); - wrap.appendChild(cap); - span.replaceWith(wrap); - } else { - // If missing, remove placeholder - span.remove(); - } - } - for (const span of Array.from(container.querySelectorAll('span[data-fn]'))) { - const num = (span as HTMLElement).getAttribute('data-fn') || ''; - const sup = document.createElement('sup'); - const a = document.createElement('a'); - a.setAttribute('href', `#fn${num}`); - a.setAttribute('class', 'footnote-ref'); - a.setAttribute('id', `fnref${num}`); - a.setAttribute('epub:type', 'noteref'); - a.textContent = `[${num}]`; - sup.appendChild(a); - span.replaceWith(sup); - } - - // 5) Append sanitized content under title - while (container.firstChild) root.appendChild(container.firstChild); - - // 6) Footnotes section at end - if (chapter.footnotes && chapter.footnotes.length > 0) { - const div = document.createElement('div'); - div.setAttribute('class', 'footnotes'); - const h3 = document.createElement('h3'); - h3.textContent = 'Footnotes'; - const ol = document.createElement('ol'); - div.appendChild(h3); - div.appendChild(ol); - for (const fn of chapter.footnotes) { - const num = String(fn.marker).replace(/^\ \[|\]$/g, ''); - const li = document.createElement('li'); - li.setAttribute('id', `fn${num}`); - li.setAttribute('epub:type', 'footnote'); - - // Allow limited inline HTML inside footnotes (e.g., , ,
) - try { - const safeHtml = sanitizeHtmlAllowlist(fn.text || ''); - if (safeHtml) { - const temp = document.createElement('div'); - temp.innerHTML = safeHtml; - while (temp.firstChild) li.appendChild(temp.firstChild); - li.appendChild(document.createTextNode(' ')); - } else { - li.appendChild(document.createTextNode((fn.text || '') + ' ')); - } - } catch { - li.appendChild(document.createTextNode((fn.text || '') + ' ')); - } - - const back = document.createElement('a'); - back.setAttribute('href', `#fnref${num}`); - back.setAttribute('class', 'footnote-backref'); - back.setAttribute('epub:type', 'backlink'); - back.textContent = '↩'; - li.appendChild(back); - ol.appendChild(li); - } - root.appendChild(div); - } - - // 7) XHTML serialization - return htmlFragmentToXhtml(toStrictXhtml(root.innerHTML)); -}; -/** - * Escape HTML characters to prevent XSS and formatting issues (kept for backward compatibility) - */ -const escapeHtml = (text: string): string => { - const div = document.createElement('div'); - div.textContent = text; - return div.innerHTML; -}; +import { AppSettings, SessionChapterData } from '../types'; +import { + ChapterForEpub, + EpubExportOptions, + TranslationStats, + TelemetryInsights, + NovelConfig, + EpubTemplate, + EpubChapter, + EpubMeta +} from './epub/types'; +import { + getDefaultTemplate, + createCustomTemplate +} from './epub/templates/defaults'; +import { getNovelConfig } from './epub/templates/novelConfig'; +import { calculateTranslationStats } from './epub/data/stats'; +import { collectActiveVersions } from './epub/data/collector'; +import { generateTitlePage } from './epub/generators/titlePage'; +import { generateTableOfContents } from './epub/generators/toc'; +import { + renderTelemetryInsights, + generateStatsAndAcknowledgments +} from './epub/generators/statsPage'; +import { + buildChapterXhtml, + htmlFragmentToXhtml +} from './epub/generators/chapter'; +import { generateEpub3WithJSZip } from './epub/packagers/epubPackager'; + +// Re-export types for consumers +export type { + ChapterForEpub, + TranslationStats, + TelemetryInsights, + NovelConfig, + EpubTemplate, + EpubExportOptions, + EpubChapter, + EpubMeta +}; + +export { + getDefaultTemplate, + createCustomTemplate, + getNovelConfig, + calculateTranslationStats, + collectActiveVersions +}; + +// Re-export specific generators if needed by tests +export { renderTelemetryInsights }; /** * Generates and downloads an EPUB file from the collected chapters using JSZip (browser-compatible) @@ -1313,6 +88,7 @@ export const generateEpub = async (options: EpubExportOptions): Promise => const includeStats = (options as any).includeStatsPage !== false; const tableOfContents = generateTableOfContents(options.chapters, includeStats); const statsAndAcknowledgments = generateStatsAndAcknowledgments(stats, template, options.telemetryInsights); + // Ensure special pages are XHTML-safe const titlePageXhtml = htmlFragmentToXhtml(titlePage); const tocXhtml = htmlFragmentToXhtml(tableOfContents); @@ -1324,6 +100,7 @@ export const generateEpub = async (options: EpubExportOptions): Promise => chapters.push({ id: 'title-page', title: 'Title Page', xhtml: titlePageXhtml, href: 'title.xhtml' }); } chapters.push({ id: 'toc-page', title: 'Table of Contents', xhtml: tocXhtml, href: 'toc.xhtml' }); + options.chapters.forEach((chapter, index) => { chapters.push({ id: `ch-${String(index + 1).padStart(3, '0')}`, @@ -1332,6 +109,7 @@ export const generateEpub = async (options: EpubExportOptions): Promise => href: `chapter-${String(index + 1).padStart(4, '0')}.xhtml` }); }); + if (includeStats) { chapters.push({ id: 'stats-page', title: 'Acknowledgments', xhtml: statsXhtml, href: 'stats.xhtml' }); } @@ -1390,388 +168,4 @@ export const generateEpub = async (options: EpubExportOptions): Promise => console.error('[EPUBService] Failed to generate EPUB:', error); throw new Error(`EPUB generation failed: ${error}`); } -}; - -// JSZip-based EPUB3 generation types and functions -export interface EpubChapter { - id: string; - title: string; - xhtml: string; - href: string; -} - -export interface EpubMeta { - title: string; - author: string; - description?: string; - language?: string; - identifier?: string; - publisher?: string; -} - -/** - * Generates EPUB3-compliant ZIP file using JSZip (browser-compatible) - */ -const generateEpub3WithJSZip = async (meta: EpubMeta, chapters: EpubChapter[]): Promise => { - const lang = meta.language || 'en'; - const bookId = meta.identifier || `urn:uuid:${crypto.randomUUID()}`; - - // EPUB3 directory structure - const oebps = 'OEBPS'; - const textDir = `${oebps}/text`; - const stylesDir = `${oebps}/styles`; - const imagesDir = `${oebps}/images`; - - // Helper to wrap content in XHTML - const xhtmlWrap = (title: string, body: string) => ` - - - - - ${escapeXml(title)} - - - - ${body} - -`; - - // Generate navigation document (EPUB3 requirement) - const navXhtml = ` - - - - - Table of Contents - - - - - -`; - - // Generate manifest items for content.opf - const manifestItems = chapters.map(ch => - `` - ).join('\n '); - - // Generate spine items for content.opf - const spineItems = chapters.map(ch => - `` - ).join('\n '); - - // Content.opf (package document) - const contentOpf = ` - - - ${escapeXml(bookId)} - ${escapeXml(meta.title)} - ${lang} - ${meta.author ? `${escapeXml(meta.author)}` : ''} - ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} - ${meta.description ? `${escapeXml(meta.description)}` : ''} - ${new Date().toISOString()} - - - - - ${manifestItems} - - - ${spineItems} - -`; - - // Container.xml (required EPUB metadata) - const containerXml = ` - - - - -`; - - // Professional CSS styling (preserved from original) - const stylesheet = ` -body { - font-family: Georgia, serif; - line-height: 1.6; - max-width: 42em; - margin: 0 auto; - padding: 1.5em; - color: #333; -} -h1 { - color: #2c3e50; - border-bottom: 2px solid #3498db; - padding-bottom: 0.5em; - margin-bottom: 1em; - font-weight: bold; -} -h2 { - color: #27ae60; - border-bottom: 1px solid #27ae60; - padding-bottom: 0.3em; - margin-top: 2em; - margin-bottom: 1em; -} -h3 { - color: #8e44ad; - margin-top: 1.5em; - margin-bottom: 0.75em; -} -p { - margin: 1em 0; - text-align: justify; - text-indent: 1.5em; -} -.illustration { - page-break-inside: avoid; - margin: 2em 0; - text-align: center; -} -.illustration img { - max-width: 100%; - height: auto; - border: 1px solid #ddd; - border-radius: 4px; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); -} -.illustration-caption { - font-style: italic; - color: #666; - text-align: center; - font-size: 0.9em; - margin-top: 0.5em; - text-indent: 0; -} -table { - width: 100%; - border-collapse: collapse; - margin: 1em 0; - font-size: 0.9em; -} -th, td { - border: 1px solid #ddd; - padding: 0.75em; - text-align: left; -} -th { - background-color: #f8f9fa; - font-weight: bold; -} -ol, ul { - margin: 1em 0; - padding-left: 2em; -} -li { - margin-bottom: 0.5em; - line-height: 1.5; -} -.gratitude-section { - background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); - color: white; - padding: 2em; - border-radius: 12px; - margin: 3em 0; -} -.gratitude-section h2 { - color: white; - border-bottom: 1px solid rgba(255,255,255,0.3); - text-align: center; -} -.gratitude-section p { - text-indent: 0; -} -/* Footnotes styling */ -.footnotes { - margin-top: 3em; - padding-top: 2em; - border-top: 1px solid #ddd; -} -.footnotes h3 { - color: #666; - font-size: 1.1em; - margin-bottom: 1em; -} -.footnotes ol { - font-size: 0.9em; - line-height: 1.4; -} -.footnotes li { - margin-bottom: 0.75em; -} -.footnote-ref { - font-size: 0.8em; - vertical-align: super; - text-decoration: none; - color: #007bff; - font-weight: bold; -} -.footnote-backref { - margin-left: 0.5em; - font-size: 0.8em; - text-decoration: none; - color: #007bff; -} -.footnote-ref:hover, .footnote-backref:hover { - text-decoration: underline; -} -/* Title page specific styling */ -.title-page { - text-align: center; - padding: 4em 2em; - page-break-after: always; -} -.title-page h1 { - font-size: 3em; - margin-bottom: 0.5em; - color: #2c3e50; - border: none; - padding: 0; -} -.title-page .subtitle { - font-size: 1.5em; - color: #7f8c8d; - font-style: italic; - margin-bottom: 2em; -} -.title-page .author { - font-size: 1.25em; - color: #34495e; - margin-bottom: 1em; -} -.title-page .metadata { - margin-top: 3em; - font-size: 0.9em; - color: #666; - line-height: 1.6; -} -.title-page .metadata p { - text-indent: 0; - margin: 0.5em 0; -}`; - - // Extract data:image payloads from chapter XHTML and rewrite to packaged image files - type ImgEntry = { href: string; mediaType: string; base64: string; id: string }; - const processedChapters: { ch: EpubChapter; xhtml: string }[] = []; - const imageEntries: ImgEntry[] = []; - let imgIndex = 1; - const dataImgRegex = /(]*?src=")(data:(image\/[A-Za-z0-9.+-]+);base64,([A-Za-z0-9+/=]+))(\"[^>]*>)/g; - - for (const ch of chapters) { - let xhtml = ch.xhtml; - xhtml = xhtml.replace(dataImgRegex, (_m, p1, _src, mime, b64, p5) => { - const ext = mime.endsWith('jpeg') ? 'jpg' : (mime.split('/')[1] || 'png'); - const filename = `img-${String(imgIndex).padStart(4, '0')}.${ext}`; - const href = `images/${filename}`; - const id = `img${imgIndex}`; - imageEntries.push({ href, mediaType: mime, base64: b64, id }); - imgIndex++; - return `${p1}../${href}${p5}`; - }); - processedChapters.push({ ch, xhtml }); - } - - // Build manifest and spine including images - const manifestItemsText = processedChapters.map(({ ch }) => - `` - ).join('\n '); - const manifestItemsImages = imageEntries.map(img => - `` - ).join('\n '); - const spineItems2 = processedChapters.map(({ ch }) => ``).join('\n '); - - const contentOpf2 = ` - - - ${escapeXml(bookId)} - ${escapeXml(meta.title)} - ${lang} - ${meta.author ? `${escapeXml(meta.author)}` : ''} - ${meta.publisher ? `${escapeXml(meta.publisher)}` : ''} - ${meta.description ? `${escapeXml(meta.description)}` : ''} - ${new Date().toISOString()} - - - - - ${manifestItemsText} - ${manifestItemsImages ? `\n ${manifestItemsImages}` : ''} - - - ${spineItems2} - -`; - - // Create ZIP with JSZip - const zip = new JSZip(); - - // Add mimetype (must be first and uncompressed) - zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' }); - - // Add META-INF - zip.file('META-INF/container.xml', containerXml); - - // Add OEBPS content - zip.file(`${oebps}/content.opf`, contentOpf2); - zip.file(`${textDir}/nav.xhtml`, navXhtml); - zip.file(`${stylesDir}/stylesheet.css`, stylesheet); - - // Add processed chapter files and extracted images (with optional strict XML parse diagnostics) - const parseErrors: string[] = []; - for (const { ch, xhtml } of processedChapters) { - const wrapped = xhtmlWrap(ch.title, xhtml); - try { - const parser = new DOMParser(); - const doc = parser.parseFromString(wrapped, 'application/xhtml+xml'); - const hasError = - doc.getElementsByTagName('parsererror').length > 0 || - doc.getElementsByTagNameNS('*', 'parsererror').length > 0; - if (hasError) { - const txt = doc.documentElement.textContent || ''; - const msg = `[ParseError] ${ch.href}: ${txt.slice(0, 300)}`; - console.warn(msg); - parseErrors.push(msg); - } - } catch {} - zip.file(`${textDir}/${ch.href}`, wrapped); - } - for (const img of imageEntries) { - zip.file(`${oebps}/${img.href}`, img.base64, { base64: true }); - } - - // Attach diagnostics when parse errors are detected - if (parseErrors.length > 0) { - zip.file(`${oebps}/debug/parse-errors.txt`, parseErrors.join('\n')); - processedChapters.forEach(({ ch, xhtml }) => { - zip.file(`${oebps}/debug/text/${ch.href}.raw.xhtml`, xhtml); - }); - } - - // Generate and return ArrayBuffer - return await zip.generateAsync({ - type: 'arraybuffer', - mimeType: 'application/epub+zip' - }); -}; - -/** - * Escapes XML characters to prevent formatting issues - */ -const escapeXml = (text: string): string => { - return text - .replace(/&/g, '&') - .replace(//g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); -}; - -// toStrictXhtml is imported from services/translate/HtmlSanitizer - -// sanitizeHtml function removed - unused in epubService +}; \ No newline at end of file From 8d62b0513ae6e7a1de0db032cec71cf13fdd6b42 Mon Sep 17 00:00:00 2001 From: Aditya A P Date: Mon, 22 Dec 2025 10:14:28 +0530 Subject: [PATCH 2/3] refactor(epub): isolate epubService decomposition modules --- services/epub/types.ts | 334 +++++++++++++----- services/epubService.ts | 25 +- .../{epub => epubService}/data/collector.ts | 0 services/{epub => epubService}/data/stats.ts | 0 .../generators/chapter.ts | 0 .../generators/statsPage.ts | 97 +---- .../generators/telemetryInsights.ts | 98 +++++ .../generators/titlePage.ts | 0 .../{epub => epubService}/generators/toc.ts | 0 .../packagers/epubPackager.ts | 161 +-------- services/epubService/packagers/stylesheet.ts | 160 +++++++++ .../sanitizers/xhtmlSanitizer.ts | Bin .../templates/defaults.ts | 0 .../templates/novelConfig.ts | 0 services/epubService/types.ts | 116 ++++++ 15 files changed, 636 insertions(+), 355 deletions(-) rename services/{epub => epubService}/data/collector.ts (100%) rename services/{epub => epubService}/data/stats.ts (100%) rename services/{epub => epubService}/generators/chapter.ts (100%) rename services/{epub => epubService}/generators/statsPage.ts (68%) create mode 100644 services/epubService/generators/telemetryInsights.ts rename services/{epub => epubService}/generators/titlePage.ts (100%) rename services/{epub => epubService}/generators/toc.ts (100%) rename services/{epub => epubService}/packagers/epubPackager.ts (73%) create mode 100644 services/epubService/packagers/stylesheet.ts rename services/{epub => epubService}/sanitizers/xhtmlSanitizer.ts (100%) rename services/{epub => epubService}/templates/defaults.ts (100%) rename services/{epub => epubService}/templates/novelConfig.ts (100%) create mode 100644 services/epubService/types.ts diff --git a/services/epub/types.ts b/services/epub/types.ts index 6553092..f11ceb1 100644 --- a/services/epub/types.ts +++ b/services/epub/types.ts @@ -1,116 +1,272 @@ -import { AppSettings } from '../../types'; +/** + * EPUB Export Pipeline Types + * + * Defines the contracts between each module in the export pipeline: + * 1. Data Collector → Collected Chapters + * 2. Asset Resolver → Resolved Assets + * 3. Content Builder → HTML/Manifest + * 4. Package Builder → Final EPUB Blob + * 5. Export Service → Orchestration + */ -export interface ChapterForEpub { +import type { AppSettings, ImageCacheKey } from '../../types'; + +// ============================================================================ +// EXPORT OPTIONS +// ============================================================================ + +export interface EpubExportOptions { + /** Chapter ordering: by number or by navigation links */ + order: 'number' | 'navigation'; + + /** Include title page */ + includeTitlePage: boolean; + + /** Include statistics page at end */ + includeStatsPage: boolean; + + /** Enable HTML repair on translated content prior to export */ + enableHtmlRepair?: boolean; + + /** Optional list of chapter URLs to include (legacy pipeline support) */ + chapterUrls?: string[]; + + /** Optional manual configuration overrides (legacy pipeline support) */ + manualConfig?: unknown; + + /** Optional custom template identifier (legacy pipeline support) */ + customTemplate?: unknown; + + /** Custom EPUB metadata overrides */ + metadata?: { + gratitudeMessage?: string; + projectDescription?: string; + footer?: string | null; + }; + + /** Settings snapshot for statistics */ + settings: AppSettings; +} + +// ============================================================================ +// DATA COLLECTOR OUTPUT +// ============================================================================ + +export interface CollectedChapter { + /** Stable chapter ID */ + id: string; + + /** Chapter number (for ordering) */ + chapterNumber?: number; + + /** Original title */ title: string; - originalTitle?: string; + + /** Original content (HTML) */ content: string; - originalUrl: string; - url?: string; - translatedTitle: string; + + /** Translated title */ + translatedTitle?: string; + + /** Translated content (HTML) */ translatedContent?: string; - prevUrl?: string | null; - nextUrl?: string | null; - usageMetrics: { - totalTokens: number; - promptTokens: number; - completionTokens: number; - estimatedCost: number; - requestTime: number; + + /** Footnotes */ + footnotes: Array<{ marker: string; text: string }>; + + /** Image references (not yet resolved) */ + imageReferences: Array<{ + placementMarker: string; + prompt: string; + cacheKey?: ImageCacheKey; + base64Fallback?: string; // Legacy data + }>; + + /** Translation metadata for statistics */ + translationMeta?: { provider: string; model: string; + cost: number; + tokens: number; + requestTime: number; }; - images: Array<{ + + /** Navigation URLs */ + prevUrl?: string | null; + nextUrl?: string | null; +} + +export interface CollectedData { + chapters: CollectedChapter[]; + + /** Session metadata */ + metadata: { + novelTitle?: string; + totalChapters: number; + translatedChapters: number; + exportDate: string; + }; + + /** Warnings from collection phase */ + warnings: Array<{ + type: 'missing-translation' | 'missing-content' | 'ordering-gap'; + chapterId: string; + message: string; + }>; +} + +// ============================================================================ +// ASSET RESOLVER OUTPUT +// ============================================================================ + +export interface ResolvedAsset { + /** Internal asset ID (e.g., "img-ch1-ILLUSTRATION-1") */ + id: string; + + /** MIME type (e.g., "image/png", "audio/mpeg") */ + mimeType: string; + + /** Binary data */ + data: ArrayBuffer; + + /** File extension for manifest (e.g., "png", "mp3") */ + extension: string; + + /** Original source reference */ + sourceRef: { + chapterId: string; marker: string; - imageData: string; // base64 data URL + type: 'image' | 'audio'; + }; +} + +export interface ResolvedChapter extends CollectedChapter { + /** Image references now have resolved asset IDs */ + imageReferences: Array<{ + placementMarker: string; prompt: string; + assetId?: string; // Set by resolver if asset found + missing?: boolean; // True if cache miss }>; - footnotes?: Array<{ +} + +export interface ResolvedAssets { + chapters: ResolvedChapter[]; + assets: ResolvedAsset[]; + + /** Asset resolution warnings */ + warnings: Array<{ + type: 'cache-miss' | 'invalid-data' | 'conversion-failed'; + assetId: string; + chapterId: string; marker: string; - text: string; + message: string; }>; } -export interface TranslationStats { - totalCost: number; - totalTime: number; - totalTokens: number; - chapterCount: number; - imageCount: number; - providerBreakdown: Record; - modelBreakdown: Record; +// ============================================================================ +// CONTENT BUILDER OUTPUT +// ============================================================================ + +export interface EpubManifestItem { + id: string; + href: string; + mediaType: string; + properties?: string; } -export interface TelemetryInsights { - totalEvents: number; - sessionDurationMs: number; - navigation: { count: number; totalMs: number; averageMs: number }; - hydration: { count: number; totalMs: number; averageMs: number }; - chapterReady: { count: number; totalMs: number; averageMs: number }; - exports?: { json?: { count: number; totalMs: number; averageMs: number }; epub?: { count: number; totalMs: number; averageMs: number } }; +export interface EpubSpineItem { + idref: string; + linear?: 'yes' | 'no'; } -export interface NovelConfig { +export interface EpubNavItem { title: string; - author: string; - originalTitle?: string; - description?: string; - genre?: string; - language: string; - originalLanguage?: string; - coverImage?: string; // base64 or URL - seriesName?: string; - volumeNumber?: number; - isbn?: string; - publisher?: string; - translationNotes?: string; + href: string; + children?: EpubNavItem[]; } -export interface EpubTemplate { - gratitudeMessage?: string; - projectDescription?: string; - githubUrl?: string; - additionalAcknowledgments?: string; - customFooter?: string; +export interface BuiltContent { + /** Per-chapter XHTML files */ + chapterFiles: Array<{ + filename: string; // e.g., "chapter-001.xhtml" + content: string; // XHTML string + chapterId: string; + }>; + + /** Optional title page XHTML */ + titlePage?: { + filename: string; + content: string; + }; + + /** Optional statistics page XHTML */ + statsPage?: { + filename: string; + content: string; + }; + + /** OPF manifest items (for content.opf) */ + manifestItems: EpubManifestItem[]; + + /** OPF spine items (reading order) */ + spineItems: EpubSpineItem[]; + + /** Navigation document structure */ + navigation: EpubNavItem[]; + + /** Package metadata */ + packageMeta: { + title: string; + language: string; + identifier: string; + date: string; + }; } -export interface EpubExportOptions { - title?: string; - author?: string; - description?: string; - chapters: ChapterForEpub[]; - settings: AppSettings; - template?: EpubTemplate; - novelConfig?: NovelConfig; - telemetryInsights?: TelemetryInsights; - includeTitlePage?: boolean; - includeStatsPage?: boolean; - customTemplate?: any; - manualConfig?: any; - chapterUrls?: string[]; +// ============================================================================ +// PACKAGE BUILDER OUTPUT +// ============================================================================ + +export interface EpubPackage { + /** Final EPUB as blob */ + blob: Blob; + + /** File size in bytes */ + sizeBytes: number; + + /** Package validation result */ + validation: { + valid: boolean; + errors: string[]; + warnings: string[]; + }; } -export interface EpubChapter { - id: string; - title: string; - xhtml: string; - href: string; +// ============================================================================ +// EXPORT SERVICE (ORCHESTRATION) +// ============================================================================ + +export interface ExportProgress { + phase: 'collecting' | 'resolving' | 'building' | 'packaging' | 'complete' | 'error'; + percent: number; // 0-100 + message: string; + detail?: string; } -export interface EpubMeta { - title: string; - author: string; - description?: string; - language?: string; - identifier?: string; - publisher?: string; -} \ No newline at end of file +export type ProgressCallback = (progress: ExportProgress) => void; + +export interface ExportResult { + success: boolean; + blob?: Blob; + error?: string; + + /** Summary statistics */ + stats: { + totalChapters: number; + assetsResolved: number; + assetsMissing: number; + warnings: number; + durationMs: number; + }; +} diff --git a/services/epubService.ts b/services/epubService.ts index 9984356..3a20312 100644 --- a/services/epubService.ts +++ b/services/epubService.ts @@ -1,4 +1,3 @@ -import { AppSettings, SessionChapterData } from '../types'; import { ChapterForEpub, EpubExportOptions, @@ -8,25 +7,25 @@ import { EpubTemplate, EpubChapter, EpubMeta -} from './epub/types'; +} from './epubService/types'; import { getDefaultTemplate, createCustomTemplate -} from './epub/templates/defaults'; -import { getNovelConfig } from './epub/templates/novelConfig'; -import { calculateTranslationStats } from './epub/data/stats'; -import { collectActiveVersions } from './epub/data/collector'; -import { generateTitlePage } from './epub/generators/titlePage'; -import { generateTableOfContents } from './epub/generators/toc'; +} from './epubService/templates/defaults'; +import { getNovelConfig } from './epubService/templates/novelConfig'; +import { calculateTranslationStats } from './epubService/data/stats'; +import { collectActiveVersions } from './epubService/data/collector'; +import { generateTitlePage } from './epubService/generators/titlePage'; +import { generateTableOfContents } from './epubService/generators/toc'; import { renderTelemetryInsights, generateStatsAndAcknowledgments -} from './epub/generators/statsPage'; +} from './epubService/generators/statsPage'; import { buildChapterXhtml, - htmlFragmentToXhtml -} from './epub/generators/chapter'; -import { generateEpub3WithJSZip } from './epub/packagers/epubPackager'; +} from './epubService/generators/chapter'; +import { htmlFragmentToXhtml } from './epubService/sanitizers/xhtmlSanitizer'; +import { generateEpub3WithJSZip } from './epubService/packagers/epubPackager'; // Re-export types for consumers export type { @@ -168,4 +167,4 @@ export const generateEpub = async (options: EpubExportOptions): Promise => console.error('[EPUBService] Failed to generate EPUB:', error); throw new Error(`EPUB generation failed: ${error}`); } -}; \ No newline at end of file +}; diff --git a/services/epub/data/collector.ts b/services/epubService/data/collector.ts similarity index 100% rename from services/epub/data/collector.ts rename to services/epubService/data/collector.ts diff --git a/services/epub/data/stats.ts b/services/epubService/data/stats.ts similarity index 100% rename from services/epub/data/stats.ts rename to services/epubService/data/stats.ts diff --git a/services/epub/generators/chapter.ts b/services/epubService/generators/chapter.ts similarity index 100% rename from services/epub/generators/chapter.ts rename to services/epubService/generators/chapter.ts diff --git a/services/epub/generators/statsPage.ts b/services/epubService/generators/statsPage.ts similarity index 68% rename from services/epub/generators/statsPage.ts rename to services/epubService/generators/statsPage.ts index 548cefd..2606789 100644 --- a/services/epub/generators/statsPage.ts +++ b/services/epubService/generators/statsPage.ts @@ -1,103 +1,12 @@ import { TranslationStats, EpubTemplate, TelemetryInsights } from '../types'; import { escapeXml } from '../sanitizers/xhtmlSanitizer'; +import { renderTelemetryInsights } from './telemetryInsights'; + +export { renderTelemetryInsights } from './telemetryInsights'; /** * Generates a detailed statistics and acknowledgments page */ -export const renderTelemetryInsights = (telemetry?: TelemetryInsights): string => { - if (!telemetry) return ''; - - const formatMs = (ms: number): string => { - if (!Number.isFinite(ms)) return '—'; - if (ms < 1000) return `${ms.toFixed(0)} ms`; - const seconds = ms / 1000; - if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)} s`; - const minutes = seconds / 60; - if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)} min`; - const hours = minutes / 60; - return `${hours.toFixed(2)} h`; - }; - - const renderRow = (label: string, data?: { count: number; totalMs: number; averageMs: number }) => { - if (!data || data.count === 0) return ''; - return ` - - ${label} - ${data.count} - ${formatMs(data.totalMs)} - ${formatMs(data.averageMs)} - `; - }; - - const rows = [ - renderRow('Navigation requests', telemetry.navigation), - renderRow('IndexedDB hydration', telemetry.hydration), - renderRow('Chapter ready-to-read', telemetry.chapterReady), - renderRow('JSON exports', telemetry.exports?.json), - renderRow('EPUB exports', telemetry.exports?.epub) - ].filter(Boolean).join(''); - - let html = `
-`; - html += `

Session Insights

-`; - html += `

Recorded via LexiconForge telemetry during preparation of this EPUB.

-`; - html += `
-`; - html += `
-`; - html += `
${telemetry.totalEvents.toLocaleString()}
-`; - html += `
Telemetry Events
-`; - html += `
-`; - html += `
-`; - html += `
${formatMs(telemetry.sessionDurationMs)}
-`; - html += `
Session Duration
-`; - html += `
-`; - html += `
-`; - - if (rows) { - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += ` -`; - html += rows; - html += ` -`; - html += `
ActivityOccurrencesTotal DurationAverage Duration
-`; - } - - html += `
- -`; - return html; -}; - export const generateStatsAndAcknowledgments = (stats: TranslationStats, template: EpubTemplate, telemetry?: TelemetryInsights): string => { let html = `

Acknowledgments

\n\n`; diff --git a/services/epubService/generators/telemetryInsights.ts b/services/epubService/generators/telemetryInsights.ts new file mode 100644 index 0000000..d503f79 --- /dev/null +++ b/services/epubService/generators/telemetryInsights.ts @@ -0,0 +1,98 @@ +import type { TelemetryInsights } from '../types'; + +export const renderTelemetryInsights = (telemetry?: TelemetryInsights): string => { + if (!telemetry) return ''; + + const formatMs = (ms: number): string => { + if (!Number.isFinite(ms)) return '—'; + if (ms < 1000) return `${ms.toFixed(0)} ms`; + const seconds = ms / 1000; + if (seconds < 60) return `${seconds.toFixed(seconds >= 10 ? 1 : 2)} s`; + const minutes = seconds / 60; + if (minutes < 60) return `${minutes.toFixed(minutes >= 10 ? 1 : 2)} min`; + const hours = minutes / 60; + return `${hours.toFixed(2)} h`; + }; + + const renderRow = (label: string, data?: { count: number; totalMs: number; averageMs: number }) => { + if (!data || data.count === 0) return ''; + return ` + + ${label} + ${data.count} + ${formatMs(data.totalMs)} + ${formatMs(data.averageMs)} + `; + }; + + const rows = [ + renderRow('Navigation requests', telemetry.navigation), + renderRow('IndexedDB hydration', telemetry.hydration), + renderRow('Chapter ready-to-read', telemetry.chapterReady), + renderRow('JSON exports', telemetry.exports?.json), + renderRow('EPUB exports', telemetry.exports?.epub), + ] + .filter(Boolean) + .join(''); + + let html = `
+`; + html += `

Session Insights

+`; + html += `

Recorded via LexiconForge telemetry during preparation of this EPUB.

+`; + html += `
+`; + html += `
+`; + html += `
${telemetry.totalEvents.toLocaleString()}
+`; + html += `
Telemetry Events
+`; + html += `
+`; + html += `
+`; + html += `
${formatMs(telemetry.sessionDurationMs)}
+`; + html += `
Session Duration
+`; + html += `
+`; + html += `
+`; + + if (rows) { + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += ` +`; + html += rows; + html += ` +`; + html += `
ActivityOccurrencesTotal DurationAverage Duration
+`; + } + + html += `
+ +`; + return html; +}; + diff --git a/services/epub/generators/titlePage.ts b/services/epubService/generators/titlePage.ts similarity index 100% rename from services/epub/generators/titlePage.ts rename to services/epubService/generators/titlePage.ts diff --git a/services/epub/generators/toc.ts b/services/epubService/generators/toc.ts similarity index 100% rename from services/epub/generators/toc.ts rename to services/epubService/generators/toc.ts diff --git a/services/epub/packagers/epubPackager.ts b/services/epubService/packagers/epubPackager.ts similarity index 73% rename from services/epub/packagers/epubPackager.ts rename to services/epubService/packagers/epubPackager.ts index aafe6ec..a855299 100644 --- a/services/epub/packagers/epubPackager.ts +++ b/services/epubService/packagers/epubPackager.ts @@ -1,6 +1,7 @@ import JSZip from 'jszip'; import { EpubMeta, EpubChapter } from '../types'; import { escapeXml } from '../sanitizers/xhtmlSanitizer'; +import { EPUB_STYLESHEET_CSS } from './stylesheet'; /** * Generates EPUB3-compliant ZIP file using JSZip (browser-compatible) @@ -89,165 +90,7 @@ export const generateEpub3WithJSZip = async (meta: EpubMeta, chapters: EpubChapt `; // Professional CSS styling (preserved from original) - const stylesheet = ` -body { - font-family: Georgia, serif; - line-height: 1.6; - max-width: 42em; - margin: 0 auto; - padding: 1.5em; - color: #333; -} -h1 { - color: #2c3e50; - border-bottom: 2px solid #3498db; - padding-bottom: 0.5em; - margin-bottom: 1em; - font-weight: bold; -} -h2 { - color: #27ae60; - border-bottom: 1px solid #27ae60; - padding-bottom: 0.3em; - margin-top: 2em; - margin-bottom: 1em; -} -h3 { - color: #8e44ad; - margin-top: 1.5em; - margin-bottom: 0.75em; -} -p { - margin: 1em 0; - text-align: justify; - text-indent: 1.5em; -} -.illustration { - page-break-inside: avoid; - margin: 2em 0; - text-align: center; -} -.illustration img { - max-width: 100%; - height: auto; - border: 1px solid #ddd; - border-radius: 4px; - box-shadow: 0 2px 4px rgba(0,0,0,0.1); -} -.illustration-caption { - font-style: italic; - color: #666; - text-align: center; - font-size: 0.9em; - margin-top: 0.5em; - text-indent: 0; -} -table { - width: 100%; - border-collapse: collapse; - margin: 1em 0; - font-size: 0.9em; -} -th, td { - border: 1px solid #ddd; - padding: 0.75em; - text-align: left; -} -th { - background-color: #f8f9fa; - font-weight: bold; -} -ol, ul { - margin: 1em 0; - padding-left: 2em; -} -li { - margin-bottom: 0.5em; - line-height: 1.5; -} -.gratitude-section { - background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); - color: white; - padding: 2em; - border-radius: 12px; - margin: 3em 0; -} -.gratitude-section h2 { - color: white; - border-bottom: 1px solid rgba(255,255,255,0.3); - text-align: center; -} -.gratitude-section p { - text-indent: 0; -} -/* Footnotes styling */ -.footnotes { - margin-top: 3em; - padding-top: 2em; - border-top: 1px solid #ddd; -} -.footnotes h3 { - color: #666; - font-size: 1.1em; - margin-bottom: 1em; -} -.footnotes ol { - font-size: 0.9em; - line-height: 1.4; -} -.footnotes li { - margin-bottom: 0.75em; -} -.footnote-ref { - font-size: 0.8em; - vertical-align: super; - text-decoration: none; - color: #007bff; - font-weight: bold; -} -.footnote-backref { - margin-left: 0.5em; - font-size: 0.8em; - text-decoration: none; - color: #007bff; -} -.footnote-ref:hover, .footnote-backref:hover { - text-decoration: underline; -} -/* Title page specific styling */ -.title-page { - text-align: center; - padding: 4em 2em; - page-break-after: always; -} -.title-page h1 { - font-size: 3em; - margin-bottom: 0.5em; - color: #2c3e50; - border: none; - padding: 0; -} -.title-page .subtitle { - font-size: 1.5em; - color: #7f8c8d; - font-style: italic; - margin-bottom: 2em; -} -.title-page .author { - font-size: 1.25em; - color: #34495e; - margin-bottom: 1em; -} -.title-page .metadata { - margin-top: 3em; - font-size: 0.9em; - color: #666; - line-height: 1.6; -} -.title-page .metadata p { - text-indent: 0; - margin: 0.5em 0; -}`; + const stylesheet = EPUB_STYLESHEET_CSS; // Extract data:image payloads from chapter XHTML and rewrite to packaged image files type ImgEntry = { href: string; mediaType: string; base64: string; id: string }; diff --git a/services/epubService/packagers/stylesheet.ts b/services/epubService/packagers/stylesheet.ts new file mode 100644 index 0000000..217024b --- /dev/null +++ b/services/epubService/packagers/stylesheet.ts @@ -0,0 +1,160 @@ +export const EPUB_STYLESHEET_CSS = ` +body { + font-family: Georgia, serif; + line-height: 1.6; + max-width: 42em; + margin: 0 auto; + padding: 1.5em; + color: #333; +} +h1 { + color: #2c3e50; + border-bottom: 2px solid #3498db; + padding-bottom: 0.5em; + margin-bottom: 1em; + font-weight: bold; +} +h2 { + color: #27ae60; + border-bottom: 1px solid #27ae60; + padding-bottom: 0.3em; + margin-top: 2em; + margin-bottom: 1em; +} +h3 { + color: #8e44ad; + margin-top: 1.5em; + margin-bottom: 0.75em; +} +p { + margin: 1em 0; + text-align: justify; + text-indent: 1.5em; +} +.illustration { + page-break-inside: avoid; + margin: 2em 0; + text-align: center; +} +.illustration img { + max-width: 100%; + height: auto; + border: 1px solid #ddd; + border-radius: 4px; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); +} +.illustration-caption { + font-style: italic; + color: #666; + text-align: center; + font-size: 0.9em; + margin-top: 0.5em; + text-indent: 0; +} +table { + width: 100%; + border-collapse: collapse; + margin: 1em 0; + font-size: 0.9em; +} +th, td { + border: 1px solid #ddd; + padding: 0.75em; + text-align: left; +} +th { + background-color: #f8f9fa; + font-weight: bold; +} +ol, ul { + margin: 1em 0; + padding-left: 2em; +} +li { + margin-bottom: 0.5em; + line-height: 1.5; +} +.gratitude-section { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + padding: 2em; + border-radius: 12px; + margin: 3em 0; +} +.gratitude-section h2 { + color: white; + border-bottom: 1px solid rgba(255,255,255,0.3); + text-align: center; +} +.gratitude-section p { + text-indent: 0; +} +/* Footnotes styling */ +.footnotes { + margin-top: 3em; + padding-top: 2em; + border-top: 1px solid #ddd; +} +.footnotes h3 { + color: #666; + font-size: 1.1em; + margin-bottom: 1em; +} +.footnotes ol { + font-size: 0.9em; + line-height: 1.4; +} +.footnotes li { + margin-bottom: 0.75em; +} +.footnote-ref { + font-size: 0.8em; + vertical-align: super; + text-decoration: none; + color: #007bff; + font-weight: bold; +} +.footnote-backref { + margin-left: 0.5em; + font-size: 0.8em; + text-decoration: none; + color: #007bff; +} +.footnote-ref:hover, .footnote-backref:hover { + text-decoration: underline; +} +/* Title page specific styling */ +.title-page { + text-align: center; + padding: 4em 2em; + page-break-after: always; +} +.title-page h1 { + font-size: 3em; + margin-bottom: 0.5em; + color: #2c3e50; + border: none; + padding: 0; +} +.title-page .subtitle { + font-size: 1.5em; + color: #7f8c8d; + font-style: italic; + margin-bottom: 2em; +} +.title-page .author { + font-size: 1.25em; + color: #34495e; + margin-bottom: 1em; +} +.title-page .metadata { + margin-top: 3em; + font-size: 0.9em; + color: #666; + line-height: 1.6; +} +.title-page .metadata p { + text-indent: 0; + margin: 0.5em 0; +}`; + diff --git a/services/epub/sanitizers/xhtmlSanitizer.ts b/services/epubService/sanitizers/xhtmlSanitizer.ts similarity index 100% rename from services/epub/sanitizers/xhtmlSanitizer.ts rename to services/epubService/sanitizers/xhtmlSanitizer.ts diff --git a/services/epub/templates/defaults.ts b/services/epubService/templates/defaults.ts similarity index 100% rename from services/epub/templates/defaults.ts rename to services/epubService/templates/defaults.ts diff --git a/services/epub/templates/novelConfig.ts b/services/epubService/templates/novelConfig.ts similarity index 100% rename from services/epub/templates/novelConfig.ts rename to services/epubService/templates/novelConfig.ts diff --git a/services/epubService/types.ts b/services/epubService/types.ts new file mode 100644 index 0000000..6553092 --- /dev/null +++ b/services/epubService/types.ts @@ -0,0 +1,116 @@ +import { AppSettings } from '../../types'; + +export interface ChapterForEpub { + title: string; + originalTitle?: string; + content: string; + originalUrl: string; + url?: string; + translatedTitle: string; + translatedContent?: string; + prevUrl?: string | null; + nextUrl?: string | null; + usageMetrics: { + totalTokens: number; + promptTokens: number; + completionTokens: number; + estimatedCost: number; + requestTime: number; + provider: string; + model: string; + }; + images: Array<{ + marker: string; + imageData: string; // base64 data URL + prompt: string; + }>; + footnotes?: Array<{ + marker: string; + text: string; + }>; +} + +export interface TranslationStats { + totalCost: number; + totalTime: number; + totalTokens: number; + chapterCount: number; + imageCount: number; + providerBreakdown: Record; + modelBreakdown: Record; +} + +export interface TelemetryInsights { + totalEvents: number; + sessionDurationMs: number; + navigation: { count: number; totalMs: number; averageMs: number }; + hydration: { count: number; totalMs: number; averageMs: number }; + chapterReady: { count: number; totalMs: number; averageMs: number }; + exports?: { json?: { count: number; totalMs: number; averageMs: number }; epub?: { count: number; totalMs: number; averageMs: number } }; +} + +export interface NovelConfig { + title: string; + author: string; + originalTitle?: string; + description?: string; + genre?: string; + language: string; + originalLanguage?: string; + coverImage?: string; // base64 or URL + seriesName?: string; + volumeNumber?: number; + isbn?: string; + publisher?: string; + translationNotes?: string; +} + +export interface EpubTemplate { + gratitudeMessage?: string; + projectDescription?: string; + githubUrl?: string; + additionalAcknowledgments?: string; + customFooter?: string; +} + +export interface EpubExportOptions { + title?: string; + author?: string; + description?: string; + chapters: ChapterForEpub[]; + settings: AppSettings; + template?: EpubTemplate; + novelConfig?: NovelConfig; + telemetryInsights?: TelemetryInsights; + includeTitlePage?: boolean; + includeStatsPage?: boolean; + customTemplate?: any; + manualConfig?: any; + chapterUrls?: string[]; +} + +export interface EpubChapter { + id: string; + title: string; + xhtml: string; + href: string; +} + +export interface EpubMeta { + title: string; + author: string; + description?: string; + language?: string; + identifier?: string; + publisher?: string; +} \ No newline at end of file From c0b9d77703e5116070586482d82e81f39c810b82 Mon Sep 17 00:00:00 2001 From: Aditya A P Date: Mon, 22 Dec 2025 10:15:27 +0530 Subject: [PATCH 3/3] docs(worklog): log epubService decomposition isolation --- docs/WORKLOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 813121c..64905d9 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -1155,3 +1155,8 @@ Next: After running with reduced logs, gather traces for 'Chapter not found' and - Why: Make first-time contributors productive quickly and capture actionable decomposition plans for known monoliths. - Details: Add onboarding walkthrough (load → translate flow), refresh CONTRIBUTING references to match current code locations, and add draft decomposition plans for `services/epubService.ts`, `store/slices/imageSlice.ts`, and `services/navigationService.ts`. - Tests: N/A (docs only) + +2025-12-22 04:45 UTC - Refactor: isolate `epubService.ts` decomposition modules +- Files: services/epubService.ts; services/epubService/**; services/epub/types.ts; docs/WORKLOG.md +- Why: Decompose the `services/epubService.ts` monolith without breaking the existing `services/epub/*` pipeline/types; keep new modules <300 LOC. +- Tests: `npx tsc --noEmit`; `npm test -- --run tests/services/epubService.test.ts tests/epub/*.test.ts`