diff --git a/client/src/screens/conductor/support/index.tsx b/client/src/screens/conductor/support/index.tsx index 4b3c578b..f889c876 100644 --- a/client/src/screens/conductor/support/index.tsx +++ b/client/src/screens/conductor/support/index.tsx @@ -22,7 +22,7 @@ const SupportCenter = () => { const july = new Date(now.getFullYear(), 6, 1); const stdOffset = Math.max( january.getTimezoneOffset(), - july.getTimezoneOffset() + july.getTimezoneOffset(), ); const isDST = now.getTimezoneOffset() < stdOffset; @@ -61,7 +61,9 @@ const SupportCenter = () => { if (!disabled) openLink(link); }} className={`flex flex-col h-80 w-96 p-4 mx-auto my-4 lg:m-4 border rounded-xl shadow-md items-center cursor-pointer ${ - disabled ? "opacity-70 !cursor-not-allowed" : "opacity-100 hover:shadow-xl" + disabled + ? "opacity-70 !cursor-not-allowed" + : "opacity-100 hover:shadow-xl" }`} aria-disabled={disabled} > @@ -88,6 +90,12 @@ const SupportCenter = () => { icon="text telephone" link="/support/contact" /> + { + debugError("Background books sync error:", e); + }); + } catch (e) { + debugError(e); + // Only send error if response hasn't been sent yet + if (!res.headersSent) { + return res.status(500).send({ + err: true, + errMsg: conductorErrors.err6, + }); + } + } +} + +/** + * Syncs all books to the search index in batches to avoid memory issues + * and timeouts with large datasets. Runs in the background. + * INTERNAL USE ONLY. + */ +async function syncBooksInBackground() { try { debugServer("Initiating Commons Books search index synchronization..."); const searchService = await SearchService.create(); + + const batchSize = 500; // Process 500 books at a time + let skip = 0; + let hasMore = true; + let totalSynced = 0; /** * Book data for search index should be in format: @@ -2637,7 +2671,7 @@ export async function syncWithSearchIndex( * projectTags: string[] // array of tag titles associated with the Book's Project * } */ - const books = await Book.aggregate([ + const aggregationPipeline = [ { // Add project data to each book (if any) $lookup: { @@ -2700,21 +2734,36 @@ export async function syncWithSearchIndex( project: 0 } } - ]) + ]; - const syncResult = await searchService.addDocuments("books", books); + while (hasMore) { + const books = await Book.aggregate([ + ...aggregationPipeline, + { $skip: skip }, + { $limit: batchSize }, + ]); - return res.send({ - err: false, - msg: "Commons Books search index synchronization completed.", - details: syncResult, - }); - } catch (err) { - debugError(err); - return res.status(500).send({ - err: true, - errMsg: conductorErrors.err6, - }); + if (books.length === 0) { + hasMore = false; + break; + } + + await searchService.addDocuments("books", books); + totalSynced += books.length; + debugServer(`Synced batch of ${books.length} books (${totalSynced} total)...`); + + skip += batchSize; + + // If we got fewer results than batchSize, we're done + if (books.length < batchSize) { + hasMore = false; + } + } + + debugServer(`Commons Books search index sync completed. Total synced: ${totalSynced}`); + } catch (e) { + debugError("Error in syncBooksInBackground:", e); + throw e; } } diff --git a/server/api/projects.js b/server/api/projects.js index a970b9f7..f9a27401 100644 --- a/server/api/projects.js +++ b/server/api/projects.js @@ -3254,11 +3254,45 @@ async function getTrafficAnalyticsData(req, res, func) { } async function syncWithSearchIndex(req, res) { + try { + // Return response immediately to avoid timeout + res.send({ + err: false, + msg: "Projects search index sync initiated. This process will run in the background.", + }); + + // Run the actual sync in the background (don't await) + syncProjectsInBackground().catch((e) => { + debugError("Background projects sync error:", e); + }); + } catch (e) { + debugError(e); + // Only send error if response hasn't been sent yet + if (!res.headersSent) { + return res.status(500).send({ + err: true, + errMsg: conductorErrors.err6, + }); + } + } +} + +/** + * Syncs all projects to the search index in batches to avoid memory issues + * and timeouts with large datasets. Runs in the background. + * INTERNAL USE ONLY. + */ +async function syncProjectsInBackground() { try { debugServer("Initiating Projects search index sync..."); const searchService = await SearchService.create(); + + const batchSize = 500; // Process 500 projects at a time + let skip = 0; + let hasMore = true; + let totalSynced = 0; - const projects = await Project.aggregate([ + const aggregationPipeline = [ { $lookup: { from: "users", @@ -3402,22 +3436,36 @@ async function syncWithSearchIndex(req, res) { instructorAssets: 1, }, }, - ]); + ]; - const syncResult = await searchService.addDocuments("projects", projects); - debugServer("Projects search index sync completed."); + while (hasMore) { + const projects = await Project.aggregate([ + ...aggregationPipeline, + { $skip: skip }, + { $limit: batchSize }, + ]); - return res.send({ - err: false, - msg: "Projects search index sync completed.", - syncResult, - }); + if (projects.length === 0) { + hasMore = false; + break; + } + + await searchService.addDocuments("projects", projects); + totalSynced += projects.length; + debugServer(`Synced batch of ${projects.length} projects (${totalSynced} total)...`); + + skip += batchSize; + + // If we got fewer results than batchSize, we're done + if (projects.length < batchSize) { + hasMore = false; + } + } + + debugServer(`Projects search index sync completed. Total synced: ${totalSynced}`); } catch (e) { - debugError(e); - return res.status(500).send({ - err: true, - errMsg: conductorErrors.err6, - }); + debugError("Error in syncProjectsInBackground:", e); + throw e; } } diff --git a/server/api/services/qdrant.ts b/server/api/services/qdrant.ts index d084b2eb..885d0d5f 100644 --- a/server/api/services/qdrant.ts +++ b/server/api/services/qdrant.ts @@ -1,11 +1,9 @@ // server/services/qdrant.ts -import { QdrantClient } from '@qdrant/js-client-rest'; -import OpenAI from 'openai'; +import { QdrantClient } from "@qdrant/js-client-rest"; +import OpenAI from "openai"; const qdrantUrl = - process.env.QDRANT_URL || - process.env.QDRANT_HOST || - 'http://localhost:6333'; + process.env.QDRANT_URL || process.env.QDRANT_HOST || "http://localhost:6333"; const qdrantClient = new QdrantClient({ url: qdrantUrl, @@ -17,47 +15,42 @@ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY!, }); -console.log('Testing Qdrant connection...'); - async function testQdrantConnection() { - try { - console.log('Testing Qdrant connection Again...'); - // Test connection by listing collections - const collections = await qdrantClient.getCollections(); - console.log('✅ Qdrant connection OK'); - } catch (error: any) { - console.error('❌ Failed to connect to Qdrant'); - console.error('Error message:', error.message); - if (error.cause) console.error('Cause:', error.cause); - } + try { + console.log("Testing Qdrant connection Again..."); + // Test connection by listing collections + const collections = await qdrantClient.getCollections(); + console.log("✅ Qdrant connection OK"); + } catch (error: any) { + console.error("❌ Failed to connect to Qdrant"); + console.error("Error message:", error.message); + if (error.cause) console.error("Cause:", error.cause); } - - // Run connection test - await testQdrantConnection(); +} export class QdrantService { - private collectionName = 'kb_pages'; + private collectionName = "kb_pages"; private vectorSize = 1536; // OpenAI text-embedding-3-small dimension // Initialize Qdrant collection async initializeCollection() { try { // Check if collection exists - console.log('Checking if collection exists ...'); + console.log("Checking if collection exists ..."); const collections = await qdrantClient.getCollections(); - console.log('collections', collections); + console.log("collections", collections); const collectionExists = collections.collections.some( - col => col.name === this.collectionName + (col) => col.name === this.collectionName, ); - console.log('collectionExists', collectionExists); + console.log("collectionExists", collectionExists); if (!collectionExists) { console.log(`Creating Qdrant collection: ${this.collectionName}`); - + await qdrantClient.createCollection(this.collectionName, { vectors: { size: this.vectorSize, - distance: 'Cosine', // Use cosine similarity + distance: "Cosine", // Use cosine similarity }, optimizers_config: { default_segment_number: 2, @@ -65,14 +58,14 @@ export class QdrantService { replication_factor: 1, }); - console.log('Collection created successfully'); + console.log("Collection created successfully"); } else { - console.log('Collection already exists'); + console.log("Collection already exists"); } return true; } catch (error) { - console.error('Error initializing Qdrant collection:', error); + console.error("Error initializing Qdrant collection:", error); throw error; } } @@ -82,18 +75,18 @@ export class QdrantService { try { // Clean HTML content to plain text const cleanText = text - .replace(/<[^>]*>/g, ' ') - .replace(/\s+/g, ' ') + .replace(/<[^>]*>/g, " ") + .replace(/\s+/g, " ") .trim(); const response = await openai.embeddings.create({ - model: 'text-embedding-3-small', + model: "text-embedding-3-small", input: cleanText, }); return response.data[0].embedding; } catch (error) { - console.error('Error generating embeddings:', error); + console.error("Error generating embeddings:", error); throw error; } } @@ -118,7 +111,10 @@ export class QdrantService { createdAt: kbPage.createdAt, updatedAt: kbPage.updatedAt, // Store clean text for better search - cleanText: kbPage.body.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim(), + cleanText: kbPage.body + .replace(/<[^>]*>/g, " ") + .replace(/\s+/g, " ") + .trim(), }, }; @@ -130,36 +126,42 @@ export class QdrantService { return { success: true, uuid: kbPage.uuid }; } catch (error) { console.error(`Error upserting KB page ${kbPage.uuid}:`, error); - return { success: false, uuid: kbPage.uuid, error: (error as Error).message }; + return { + success: false, + uuid: kbPage.uuid, + error: (error as Error).message, + }; } } // Batch upsert multiple KB pages async batchUpsertKBPages(kbPages: any[], batchSize: number = 10) { const results: any[] = []; - + for (let i = 0; i < kbPages.length; i += batchSize) { const batch = kbPages.slice(i, i + batchSize); - console.log(`Processing batch ${Math.floor(i/batchSize) + 1}/${Math.ceil(kbPages.length/batchSize)}`); + console.log( + `Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(kbPages.length / batchSize)}`, + ); - const batchPromises = batch.map(page => this.upsertKBPage(page)); + const batchPromises = batch.map((page) => this.upsertKBPage(page)); const batchResults = await Promise.allSettled(batchPromises); - + batchResults.forEach((result, index) => { - if (result.status === 'fulfilled') { + if (result.status === "fulfilled") { results.push(result.value); } else { results.push({ success: false, uuid: batch[index].uuid, - error: result.reason?.message || 'Unknown error' + error: result.reason?.message || "Unknown error", }); } }); // Add delay to avoid rate limiting if (i + batchSize < kbPages.length) { - await new Promise(resolve => setTimeout(resolve, 1000)); + await new Promise((resolve) => setTimeout(resolve, 1000)); } } @@ -178,14 +180,14 @@ export class QdrantService { filter: filter || { must: [ { - key: 'status', - match: { value: 'published' } - } - ] + key: "status", + match: { value: "published" }, + }, + ], }, }); - return searchResult.map(point => ({ + return searchResult.map((point) => ({ uuid: point.payload?.uuid, title: point.payload?.title, description: point.payload?.description, @@ -195,7 +197,7 @@ export class QdrantService { cleanText: point.payload?.cleanText, })); } catch (error) { - console.error('Error searching Qdrant:', error); + console.error("Error searching Qdrant:", error); throw error; } } @@ -206,7 +208,7 @@ export class QdrantService { const info = await qdrantClient.getCollection(this.collectionName); return info; } catch (error) { - console.error('Error getting collection info:', error); + console.error("Error getting collection info:", error); throw error; } } @@ -226,4 +228,4 @@ export class QdrantService { } } -export const qdrantService = new QdrantService(); \ No newline at end of file +export const qdrantService = new QdrantService();