From 72a94eeb13ce6317e4123ec0f3e03bfb50c0a146 Mon Sep 17 00:00:00 2001 From: Al-Ameen Ogundiran Date: Sun, 5 Oct 2025 05:57:24 +0100 Subject: [PATCH 01/20] fix prestart errors --- .../10368f38610b_fix_delete_document_error.py | 17 ++++------------- backend/scripts/prestart.sh | 3 +-- docker-compose.override.yml | 1 + frontend/src/runtime-config.ts | 7 ++++--- 4 files changed, 10 insertions(+), 18 deletions(-) diff --git a/backend/app/alembic/versions/10368f38610b_fix_delete_document_error.py b/backend/app/alembic/versions/10368f38610b_fix_delete_document_error.py index a1e3163..149f7ec 100644 --- a/backend/app/alembic/versions/10368f38610b_fix_delete_document_error.py +++ b/backend/app/alembic/versions/10368f38610b_fix_delete_document_error.py @@ -18,25 +18,16 @@ def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('chat', - sa.Column('message', sqlmodel.sql.sqltypes.AutoString(length=1024), nullable=True), - sa.Column('is_system', sa.Boolean(), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=False), - sa.Column('id', sa.Uuid(), nullable=False), - sa.Column('course_id', sa.Uuid(), nullable=False), - sa.ForeignKeyConstraint(['course_id'], ['course.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('id') - ) + # This migration originally (incorrectly) re-created the 'chat' table. + # The chat table is created in migration '6e308b39ff60_add_chat_table'. + # Keep only the intended FK change for 'quizattempt'. op.drop_constraint(op.f('quizattempt_quiz_id_fkey'), 'quizattempt', type_='foreignkey') op.create_foreign_key(None, 'quizattempt', 'quiz', ['quiz_id'], ['id'], ondelete='CASCADE') # ### end Alembic commands ### def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### + # Revert the FK change only; do not drop 'chat' which belongs to a prior migration. op.drop_constraint(None, 'quizattempt', type_='foreignkey') op.create_foreign_key(op.f('quizattempt_quiz_id_fkey'), 'quizattempt', 'quiz', ['quiz_id'], ['id']) - op.drop_table('chat') # ### end Alembic commands ### diff --git a/backend/scripts/prestart.sh b/backend/scripts/prestart.sh index 81094f9..480789c 100644 --- a/backend/scripts/prestart.sh +++ b/backend/scripts/prestart.sh @@ -6,8 +6,7 @@ set -x # Let the DB start python app/backend_pre_start.py -# Run migrations alembic upgrade head # Create initial data in DB -python app/initial_data.py \ No newline at end of file +python app/initial_data.py diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 871d206..036ad1a 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -97,6 +97,7 @@ services: environment: NODE_ENV: development NEXT_PUBLIC_BACKEND_BASE_URL: http://localhost:8000 + NEXT_INTERNAL_BACKEND_BASE_URL: http://backend:8000 depends_on: - backend volumes: diff --git a/frontend/src/runtime-config.ts b/frontend/src/runtime-config.ts index 30734d6..3684cc2 100644 --- a/frontend/src/runtime-config.ts +++ b/frontend/src/runtime-config.ts @@ -4,8 +4,9 @@ import type { CreateClientConfig } from './client/client.gen' export const createClientConfig: CreateClientConfig =(config) => { const isServer = typeof window === 'undefined' - const baseURL = - process.env.NEXT_PUBLIC_BACKEND_BASE_URL ?? 'http://localhost:8000' + const baseURL = isServer + ? (process.env.NEXT_INTERNAL_BACKEND_BASE_URL ?? 'http://backend:8000') + : (process.env.NEXT_PUBLIC_BACKEND_BASE_URL ?? 'http://localhost:8000') return { ...config, @@ -26,4 +27,4 @@ export const createClientConfig: CreateClientConfig =(config) => { return undefined }, } -} \ No newline at end of file +} From b3b33738339c020cedaaeac8744eb44a3f7e37f3 Mon Sep 17 00:00:00 2001 From: Al-Ameen Ogundiran Date: Sun, 5 Oct 2025 16:09:11 +0100 Subject: [PATCH 02/20] hold fixes to chat --- backend/app/api/routes/chat.py | 11 +- backend/app/api/routes/documents.py | 23 +++- backend/app/schemas/public.py | 6 +- backend/app/services/chat_cache.py | 13 ++- backend/app/services/chat_service.py | 22 +++- backend/app/services/rag_service.py | 104 ++++++++++++++++-- frontend/src/app/layout.tsx | 2 +- .../src/components/quiz/quiz-attempts.tsx | 7 +- 8 files changed, 166 insertions(+), 22 deletions(-) diff --git a/backend/app/api/routes/chat.py b/backend/app/api/routes/chat.py index 17029c2..2d44a0a 100644 --- a/backend/app/api/routes/chat.py +++ b/backend/app/api/routes/chat.py @@ -1,6 +1,7 @@ import uuid from collections.abc import AsyncGenerator +import logging from fastapi import APIRouter from fastapi.responses import StreamingResponse from pydantic import BaseModel @@ -11,6 +12,7 @@ from app.services.chat_db import verify_course_access, get_all_messages, create_greeting_if_needed router = APIRouter(prefix="/chat", tags=["chat"]) +logger = logging.getLogger(__name__) class ChatMessage(BaseModel): @@ -79,6 +81,13 @@ async def stream_chat( Returns: Streaming response of AI-generated content """ + logger.info( + "[API] /chat/%s/stream | continue=%s | user_id=%s | preview=%s", + str(course_id), + chat.continue_response, + str(current_user.id), + chat.message[:120], + ) return StreamingResponse( generate_chat_response( chat.message, @@ -139,4 +148,4 @@ async def get_chat_history( return [] # Convert to ChatPublic - return [ChatPublic(**msg.model_dump()) for msg in messages] \ No newline at end of file + return [ChatPublic(**msg.model_dump()) for msg in messages] diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index b9bc700..291cfe3 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -4,6 +4,7 @@ import tempfile import uuid from asyncio.log import logger +import logging from datetime import datetime, timezone from typing import Any @@ -36,7 +37,8 @@ MAX_FILE_SIZE_MB = 25 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 -pc = Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV_NAME) +pc = Pinecone(api_key=PINECONE_API_KEY) +log = logging.getLogger(__name__) task_status: dict[str, str] = {} @@ -46,8 +48,15 @@ def ensure_index_exists(): """Ensure Pinecone index exists with the correct dimension, recreate if wrong.""" if pc.has_index(index_name): + log.info("[DOCS] Pinecone index exists | name=%s", index_name) existing = pc.describe_index(index_name) if existing.dimension != EXPECTED_DIMENSION: + log.warning( + "[DOCS] Index dimension mismatch | name=%s | have=%s want=%s — recreating", + index_name, + existing.dimension, + EXPECTED_DIMENSION, + ) pc.delete_index(index_name) pc.create_index( name=index_name, @@ -56,6 +65,7 @@ def ensure_index_exists(): spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) else: + log.info("[DOCS] Creating Pinecone index | name=%s dim=%s", index_name, EXPECTED_DIMENSION) pc.create_index( name=index_name, dimension=EXPECTED_DIMENSION, @@ -165,6 +175,7 @@ async def process_pdf_task(file_path: str, document_id: uuid.UUID, session: Sess "id": embedding_uuid, "values": embedding, "metadata": { + "course_id": str(document.course_id), "document_id": str(document_id), "chunk_id": str(record.id), "text": record.text_content, @@ -176,6 +187,16 @@ async def process_pdf_task(file_path: str, document_id: uuid.UUID, session: Sess session.commit() index = pc.Index(index_name) + log.info( + "[DOCS] Upserting vectors | index=%s | count=%d | course_id=%s | document_id=%s", + index_name, + len(vectors_to_upsert), + str(document.course_id), + str(document_id), + ) + if vectors_to_upsert: + sample_meta = vectors_to_upsert[0].get("metadata", {}) + log.info("[DOCS] Sample vector metadata keys=%s", list(sample_meta.keys())) index.upsert(vectors=vectors_to_upsert) document.updated_at = datetime.now(timezone.utc) diff --git a/backend/app/schemas/public.py b/backend/app/schemas/public.py index e17d451..3cdede4 100644 --- a/backend/app/schemas/public.py +++ b/backend/app/schemas/public.py @@ -196,10 +196,12 @@ class QuizSessionPublicWithResults(QuizSessionPublicWithQuizzes): results: list[QuizAttemptPublic] = Field(default_factory=list) -class ChatPublic(QuizSessionPublic): +class ChatPublic(PydanticBase): + """Public schema for Chat message entries (no quiz fields).""" + id: uuid.UUID - message: str course_id: uuid.UUID + message: str | None = None is_system: bool created_at: datetime updated_at: datetime diff --git a/backend/app/services/chat_cache.py b/backend/app/services/chat_cache.py index 6fdb53f..59c2459 100644 --- a/backend/app/services/chat_cache.py +++ b/backend/app/services/chat_cache.py @@ -1,6 +1,7 @@ """ Chat response caching service """ +import logging import uuid import numpy as np from typing import Optional, Tuple, List @@ -10,6 +11,8 @@ from app.models.chat import Chat from app.api.deps import SessionDep +logger = logging.getLogger(__name__) + # Caching constants SIMILARITY_THRESHOLD = 0.85 # Minimum similarity for cache hit MAX_CACHE_ENTRIES = 100 # Maximum cached responses per course @@ -57,6 +60,7 @@ async def check_cached_response( recent_pairs[i+1].message): pairs.append((recent_pairs[i].message, recent_pairs[i+1].message)) + logger.debug("[CACHE] Checking %d recent QA pairs for similarity", len(pairs)) # Check similarity with recent questions for cached_question, cached_response in pairs[:MAX_CACHE_ENTRIES]: try: @@ -71,10 +75,15 @@ async def check_cached_response( similarity = cosine_similarity(question_embedding, cached_embedding) if similarity >= SIMILARITY_THRESHOLD: + logger.info( + "[CACHE] Cache hit | similarity=%.3f | threshold=%.3f", + similarity, + SIMILARITY_THRESHOLD, + ) return cached_response, cached_question except Exception as e: - print(f"Error checking cache similarity: {e}") + logger.exception("[CACHE] Error checking cache similarity: %s", e) continue - return None \ No newline at end of file + return None diff --git a/backend/app/services/chat_service.py b/backend/app/services/chat_service.py index a3ab705..a5f77ee 100644 --- a/backend/app/services/chat_service.py +++ b/backend/app/services/chat_service.py @@ -1,6 +1,7 @@ """ Main chat service that orchestrates all chat functionality """ +import logging import uuid from collections.abc import AsyncGenerator from typing import List @@ -23,6 +24,8 @@ from app.services.rag_service import get_question_embedding, retrieve_relevant_context from app.services.openai_service import stream_cached_response, generate_openai_response +logger = logging.getLogger(__name__) + async def handle_continuation( course_id: uuid.UUID, @@ -32,11 +35,17 @@ async def handle_continuation( """Handle response continuation logic""" # Verify access course = verify_course_access(course_id, session, current_user) + logger.info( + "[CHAT] Continuation requested | course_id=%s | user_id=%s", + str(course_id), + str(current_user.id), + ) # Get the last system message to continue from last_system_msg = get_last_system_message(course_id, session) if not last_system_msg or not last_system_msg.message: + logger.warning("[CHAT] No previous system message found to continue | course_id=%s", str(course_id)) yield "Error: No previous response found to continue" return @@ -97,9 +106,16 @@ async def handle_regular_question( """Handle regular question processing with RAG and caching""" # Verify access course = verify_course_access(course_id, session, current_user) + logger.info( + "[CHAT] Question received | course_id=%s | user_id=%s | question_preview=%s", + str(course_id), + str(current_user.id), + question[:120], + ) # Generate embedding for the question question_embedding = await get_question_embedding(question) + logger.debug("[CHAT] Question embedding dims=%d", len(question_embedding)) # Check for cached similar response first cached_result = await check_cached_response( @@ -108,6 +124,7 @@ async def handle_regular_question( if cached_result: cached_response, _ = cached_result + logger.info("[CHAT] Cache hit for similar question | course_id=%s", str(course_id)) # Save user message save_user_message(question, course_id, session) @@ -124,11 +141,13 @@ async def handle_regular_question( context_str = await retrieve_relevant_context(question_embedding, course_id) if not context_str: + logger.warning("[CHAT] No relevant context found | course_id=%s", str(course_id)) yield "Error: No relevant content found for this question" return # Get recent chat history for conversational context recent_messages = get_recent_messages(course_id, session) + logger.debug("[CHAT] Recent messages count=%d", len(recent_messages)) # Filter history based on token limits conversation_history = filter_chat_history( @@ -165,4 +184,5 @@ async def handle_regular_question( yield chunk # Save system message - save_system_message(full_response, course_id, session) \ No newline at end of file + save_system_message(full_response, course_id, session) + logger.info("[CHAT] Response saved | chars=%d | course_id=%s", len(full_response), str(course_id)) diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py index 29f063f..1de4bbf 100644 --- a/backend/app/services/rag_service.py +++ b/backend/app/services/rag_service.py @@ -1,6 +1,7 @@ """ RAG (Retrieval-Augmented Generation) service for document context retrieval """ +import logging import uuid from typing import List, Optional @@ -11,14 +12,18 @@ pc, ) +logger = logging.getLogger(__name__) async def get_question_embedding(question: str) -> List[float]: """Generate embedding for a question""" + logger.info("[RAG] Generating question embedding | model=%s | question_preview=%s", EMBEDDING_MODEL, question[:120]) embed_resp = await async_openai_client.embeddings.create( input=[question], model=EMBEDDING_MODEL, ) - return embed_resp.data[0].embedding + embedding = embed_resp.data[0].embedding + logger.debug("[RAG] Embedding generated | dims=%d | first5=%s", len(embedding), embedding[:5]) + return embedding async def retrieve_relevant_context( @@ -38,26 +43,101 @@ async def retrieve_relevant_context( Concatenated context string or None if no relevant content found """ try: + # Ensure index exists and log setup + has_idx = pc.has_index(index_name) + logger.info( + "[RAG] Query Pinecone | index=%s exists=%s | filter.course_id=%s | top_k=%d | embed_dims=%d", + index_name, + has_idx, + str(course_id), + top_k, + len(question_embedding), + ) + if not has_idx: + logger.warning("[RAG] Index %s does not exist before query", index_name) + # Query Pinecone for relevant chunks index = pc.Index(index_name) query_result = index.query( vector=question_embedding, - filter={"course_id": str(course_id)}, + filter={"course_id": {"$eq": str(course_id)}}, top_k=top_k, include_metadata=True, ) - - contexts = [ - match["metadata"]["text"] - for match in query_result["matches"] - if "metadata" in match and "text" in match["metadata"] - ] + + # Pinecone may return either an object or dict-like structure + matches = query_result.get("matches", []) if hasattr(query_result, "get") else getattr(query_result, "matches", []) + logger.info("[RAG] Pinecone returned %d matches", len(matches) if matches is not None else 0) + + contexts: List[str] = [] + if matches: + for i, m in enumerate(matches[:min(5, len(matches))]): + # tolerate different shapes (dict or object) + score = m.get("score") if isinstance(m, dict) else getattr(m, "score", None) + metadata = m.get("metadata") if isinstance(m, dict) else getattr(m, "metadata", {}) + text = metadata.get("text") if isinstance(metadata, dict) else None + cid = metadata.get("course_id") if isinstance(metadata, dict) else None + did = metadata.get("document_id") if isinstance(metadata, dict) else None + contexts.append(text) if text else None + logger.debug( + "[RAG] match[%d] | score=%s | course_id=%s | document_id=%s | text_len=%s", + i, + f"{score:.4f}" if isinstance(score, (int, float)) else str(score), + str(cid), + str(did), + len(text) if isinstance(text, str) else 0, + ) if not contexts: + # Additional debug: try an unfiltered query to inspect stored metadata + try: + probe = index.query( + vector=question_embedding, + top_k=1, + include_metadata=True, + ) + probe_matches = probe.get("matches", []) if hasattr(probe, "get") else getattr(probe, "matches", []) + logger.info("[RAG] Probe (no-filter) returned %d matches", len(probe_matches) if probe_matches is not None else 0) + if probe_matches: + pm = probe_matches[0] + pmeta = pm.get("metadata") if isinstance(pm, dict) else getattr(pm, "metadata", {}) + logger.info( + "[RAG] Probe match metadata keys=%s | has_course_id=%s", + list(pmeta.keys()) if isinstance(pmeta, dict) else str(type(pmeta)), + isinstance(pmeta, dict) and ("course_id" in pmeta), + ) + # Fallback: if vectors exist but filter produced none, try manual filtering in code + # to guard against filter-shape mismatches across SDK versions + fallback = index.query( + vector=question_embedding, + top_k=max(10, top_k), + include_metadata=True, + ) + fb_matches = fallback.get("matches", []) if hasattr(fallback, "get") else getattr(fallback, "matches", []) + fb_contexts: List[str] = [] + for m in fb_matches: + md = m.get("metadata") if isinstance(m, dict) else getattr(m, "metadata", {}) + text = md.get("text") if isinstance(md, dict) else None + cid = md.get("course_id") if isinstance(md, dict) else None + if text and str(cid) == str(course_id): + fb_contexts.append(text) + if fb_contexts: + merged_fb = "\n\n".join(fb_contexts[:top_k]) + logger.info("[RAG] Fallback produced %d contexts after manual course_id filter", len(fb_contexts)) + return merged_fb + except Exception as pe: + logger.exception("[RAG] Probe query failed: %s", pe) + logger.warning( + "[RAG] No contexts found from Pinecone | index=%s | course_id=%s", + index_name, + str(course_id), + ) return None - return "\n\n".join(contexts) - + merged = "\n\n".join(contexts) + logger.info("[RAG] Aggregated context length=%d", len(merged)) + return merged + except Exception as e: - print(f"Error retrieving context: {e}") - return None \ No newline at end of file + logger.exception("[RAG] Error retrieving context from Pinecone: %s", e) + return None diff --git a/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx index 752d9e1..303ea02 100644 --- a/frontend/src/app/layout.tsx +++ b/frontend/src/app/layout.tsx @@ -28,7 +28,7 @@ export default async function RootLayout({ children: React.ReactNode }) { return ( - +
{children}
diff --git a/frontend/src/components/quiz/quiz-attempts.tsx b/frontend/src/components/quiz/quiz-attempts.tsx index 0ab8e19..dd1e673 100644 --- a/frontend/src/components/quiz/quiz-attempts.tsx +++ b/frontend/src/components/quiz/quiz-attempts.tsx @@ -37,9 +37,12 @@ export default async function QuizAttempts({courseId}: {courseId: string}) {
{result.data.map((attempt, idx) => ( - +
From d9a78a1fd9a715467ecc74e95b9c2c9a7d62fdbe Mon Sep 17 00:00:00 2001 From: Al-Ameen Ogundiran Date: Mon, 6 Oct 2025 03:31:45 +0100 Subject: [PATCH 03/20] implement mvp podcast --- .env.example | 17 +- .../2042a1f0c0a1_add_podcast_table.py | 38 ++ backend/app/api/main.py | 2 + backend/app/api/routes/documents.py | 17 + backend/app/api/routes/podcasts.py | 151 +++++++ backend/app/core/config.py | 11 + backend/app/models/__init__.py | 1 + backend/app/models/course.py | 7 + backend/app/models/podcast.py | 30 ++ backend/app/schemas/public.py | 21 + backend/app/services/podcast_service.py | 368 +++++++++++++++++ backend/app/services/rag_service.py | 10 +- backend/pyproject.toml | 1 + .../dashboard/courses/[id]/page.tsx | 5 +- .../documents/by-course/[courseId]/route.ts | 47 +++ .../app/api/v1/podcasts/[courseId]/route.ts | 64 +++ .../v1/podcasts/audio/[podcastId]/route.ts | 55 +++ .../v1/podcasts/by-id/[podcastId]/route.ts | 35 ++ frontend/src/components/podcast.tsx | 373 ++++++++++++++++++ frontend/src/lib/api-config.ts | 34 ++ frontend/src/lib/podcast-service.ts | 232 +++++++++++ frontend/src/lib/podcast-ui.ts | 36 ++ 22 files changed, 1548 insertions(+), 7 deletions(-) create mode 100644 backend/app/alembic/versions/2042a1f0c0a1_add_podcast_table.py create mode 100644 backend/app/api/routes/podcasts.py create mode 100644 backend/app/models/podcast.py create mode 100644 backend/app/services/podcast_service.py create mode 100644 frontend/src/app/api/v1/documents/by-course/[courseId]/route.ts create mode 100644 frontend/src/app/api/v1/podcasts/[courseId]/route.ts create mode 100644 frontend/src/app/api/v1/podcasts/audio/[podcastId]/route.ts create mode 100644 frontend/src/app/api/v1/podcasts/by-id/[podcastId]/route.ts create mode 100644 frontend/src/components/podcast.tsx create mode 100644 frontend/src/lib/api-config.ts create mode 100644 frontend/src/lib/podcast-service.ts create mode 100644 frontend/src/lib/podcast-ui.ts diff --git a/.env.example b/.env.example index e1e9179..046f014 100644 --- a/.env.example +++ b/.env.example @@ -50,4 +50,19 @@ PINECONE_API_KEY=changethis OPENAI_API_KEY=changethis -NEXT_PUBLIC_BACKEND_BASE_URL=http://localhost:8000 \ No newline at end of file +NEXT_PUBLIC_BACKEND_BASE_URL=http://localhost:8000 + +NEXT_INTERNAL_BACKEND_BASE_URL=http://backend:8000 + +# Podcast storage configuration +# "local" will store files under backend container at /app/podcasts +# "s3" will upload to an S3 bucket using the credentials below +PODCAST_STORAGE=local +PODCAST_LOCAL_DIR=/app/podcasts +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION= +S3_BUCKET_NAME= +S3_PREFIX=podcasts/ +PODCAST_TEACHER_VOICE=coral +PODCAST_STUDENT_VOICE=alloy diff --git a/backend/app/alembic/versions/2042a1f0c0a1_add_podcast_table.py b/backend/app/alembic/versions/2042a1f0c0a1_add_podcast_table.py new file mode 100644 index 0000000..fa7bdd5 --- /dev/null +++ b/backend/app/alembic/versions/2042a1f0c0a1_add_podcast_table.py @@ -0,0 +1,38 @@ +"""add podcast table + +Revision ID: 2042a1f0c0a1 +Revises: 10368f38610b +Create Date: 2025-10-05 06:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel.sql.sqltypes + + +# revision identifiers, used by Alembic. +revision = '2042a1f0c0a1' +down_revision = '2cde6f094a4e' +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + 'podcast', + sa.Column('id', sa.Uuid(), nullable=False), + sa.Column('course_id', sa.Uuid(), nullable=False), + sa.Column('title', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False), + sa.Column('transcript', sa.Text(), nullable=False), + sa.Column('audio_path', sqlmodel.sql.sqltypes.AutoString(length=1024), nullable=False), + sa.Column('storage_backend', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=False), + sa.Column('duration_seconds', sa.Float(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['course_id'], ['course.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + + +def downgrade(): + op.drop_table('podcast') diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 558f556..9d6a397 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -5,6 +5,7 @@ courses, documents, items, + podcasts, login, private, quiz_sessions, @@ -21,6 +22,7 @@ api_router.include_router(courses.router) api_router.include_router(chat.router) api_router.include_router(documents.router) +api_router.include_router(podcasts.router) api_router.include_router(quiz_sessions.router) if settings.ENVIRONMENT == "local": diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py index 291cfe3..4acf3a5 100644 --- a/backend/app/api/routes/documents.py +++ b/backend/app/api/routes/documents.py @@ -311,6 +311,23 @@ async def process_multiple_documents( return {"message": "Processing started for multiple files", "documents": results} +@router.get("/by-course/{course_id}") +def list_documents_by_course(session: SessionDep, current_user: CurrentUser, course_id: uuid.UUID) -> Any: + """List documents for a course with basic fields.""" + docs = session.exec(select(Document).where(Document.course_id == course_id)).all() + return [ + { + "id": d.id, + "title": d.title, + "filename": d.filename, + "status": d.status.value if hasattr(d.status, 'value') else str(d.status), + "created_at": d.created_at, + "updated_at": d.updated_at, + } + for d in docs + ] + + @router.get("/{id}", response_model=Document) def read_document(session: SessionDep, current_user: CurrentUser, id: uuid.UUID) -> Any: """Get a document by its ID, ensuring the user has permissions.""" diff --git a/backend/app/api/routes/podcasts.py b/backend/app/api/routes/podcasts.py new file mode 100644 index 0000000..4565799 --- /dev/null +++ b/backend/app/api/routes/podcasts.py @@ -0,0 +1,151 @@ +import os +import uuid +from typing import Any, Optional, List + +from fastapi import APIRouter, HTTPException +from fastapi.responses import StreamingResponse, JSONResponse +from pydantic import BaseModel + +from app.api.deps import CurrentUser, SessionDep +from sqlmodel import select +from sqlalchemy.orm import selectinload +from app.core.config import settings +from app.models.podcast import Podcast +from app.schemas.public import PodcastPublic, PodcastsPublic +from app.services.podcast_service import generate_podcast_for_course + +router = APIRouter(prefix="/podcasts", tags=["podcasts"]) + + +@router.get("/{course_id}", response_model=PodcastsPublic) +def list_podcasts(course_id: uuid.UUID, session: SessionDep, current_user: CurrentUser) -> Any: + pods = session.exec(select(Podcast).where(Podcast.course_id == course_id)).all() + return PodcastsPublic(data=[PodcastPublic.model_validate(p) for p in pods]) + + +class GeneratePodcastRequest(BaseModel): + title: Optional[str] = None + mode: Optional[str] = None # 'dialogue' | 'presentation' + topics: Optional[str] = None + teacher_voice: Optional[str] = None + student_voice: Optional[str] = None + narrator_voice: Optional[str] = None + document_ids: Optional[List[uuid.UUID]] = None + + +@router.post("/{course_id}/generate", response_model=PodcastPublic) +async def generate_podcast( + course_id: uuid.UUID, + session: SessionDep, + current_user: CurrentUser, + body: GeneratePodcastRequest | None = None, +) -> Any: + if not body or not body.title or not body.title.strip(): + raise HTTPException(status_code=422, detail="Title is required") + title = body.title.strip() + mode = (body.mode or "dialogue") if body else "dialogue" + topics = body.topics if body else None + teacher_voice = body.teacher_voice if body and body.teacher_voice else settings.PODCAST_TEACHER_VOICE + student_voice = body.student_voice if body and body.student_voice else settings.PODCAST_STUDENT_VOICE + narrator_voice = body.narrator_voice if body and body.narrator_voice else settings.PODCAST_TEACHER_VOICE + doc_ids = body.document_ids if body and body.document_ids else None + podcast = await generate_podcast_for_course( + session, + course_id, + title, + teacher_voice, + student_voice, + narrator_voice, + mode, + topics, + doc_ids, + ) + return PodcastPublic.model_validate(podcast) + + +@router.get("/by-id/{podcast_id}", response_model=PodcastPublic) +def get_podcast(podcast_id: uuid.UUID, session: SessionDep, current_user: CurrentUser) -> Any: + pod = session.get(Podcast, podcast_id) + if not pod: + raise HTTPException(status_code=404, detail="Podcast not found") + return PodcastPublic.model_validate(pod) + + +@router.get("/by-id/{podcast_id}/audio") +def stream_audio(podcast_id: uuid.UUID, session: SessionDep, current_user: CurrentUser): + pod = session.get(Podcast, podcast_id) + if not pod: + raise HTTPException(status_code=404, detail="Podcast not found") + if pod.storage_backend == "local": + file_path = pod.audio_path + if not os.path.exists(file_path): + raise HTTPException(status_code=404, detail="Audio file missing") + def iterfile(): + with open(file_path, "rb") as f: + while chunk := f.read(8192): + yield chunk + return StreamingResponse(iterfile(), media_type="audio/mpeg") + else: + # For S3, return a presigned URL to let client fetch directly + try: + import boto3 + s3 = boto3.client( + "s3", + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_REGION, + ) + bucket = settings.S3_BUCKET_NAME + if not bucket: + raise ValueError("S3 bucket not configured") + key = pod.audio_path.replace(f"s3://{bucket}/", "") if pod.audio_path.startswith("s3://") else pod.audio_path + url = s3.generate_presigned_url( + ClientMethod='get_object', + Params={'Bucket': bucket, 'Key': key}, + ExpiresIn=3600, + ) + return JSONResponse({"url": url}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to generate S3 URL: {e}") + + +@router.delete("/by-id/{podcast_id}") +def delete_podcast(podcast_id: uuid.UUID, session: SessionDep, current_user: CurrentUser) -> Any: + pod = session.exec( + select(Podcast).where(Podcast.id == podcast_id).options(selectinload(Podcast.course)) # type: ignore + ).first() + + if not pod: + raise HTTPException(status_code=404, detail="Podcast not found") + + # Permission: owner or superuser + if not current_user.is_superuser and getattr(pod, "course", None) and pod.course.owner_id != current_user.id: # type: ignore + raise HTTPException(status_code=403, detail="Not enough permissions to delete this podcast") + + # Best-effort delete of underlying media + try: + if pod.storage_backend == "local" and pod.audio_path and os.path.exists(pod.audio_path): + try: + os.remove(pod.audio_path) + except Exception: + pass + elif pod.storage_backend == "s3" and pod.audio_path: + try: + import boto3 + bucket = settings.S3_BUCKET_NAME + if bucket: + key = pod.audio_path.replace(f"s3://{bucket}/", "") if pod.audio_path.startswith("s3://") else pod.audio_path + s3 = boto3.client( + "s3", + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_REGION, + ) + s3.delete_object(Bucket=bucket, Key=key) + except Exception: + # ignore media delete failures + pass + finally: + session.delete(pod) + session.commit() + return {"message": "Podcast deleted successfully"} diff --git a/backend/app/core/config.py b/backend/app/core/config.py index ee13bec..cee66cb 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -93,6 +93,17 @@ def emails_enabled(self) -> bool: FIRST_SUPERUSER: EmailStr FIRST_SUPERUSER_PASSWORD: str + # Podcast/Audio storage settings + PODCAST_STORAGE: Literal["local", "s3"] = "local" + PODCAST_LOCAL_DIR: str = "/app/podcasts" + AWS_ACCESS_KEY_ID: str | None = None + AWS_SECRET_ACCESS_KEY: str | None = None + AWS_REGION: str | None = None + S3_BUCKET_NAME: str | None = None + S3_PREFIX: str = "podcasts/" + PODCAST_TEACHER_VOICE: str = "coral" + PODCAST_STUDENT_VOICE: str = "alloy" + def _check_default_secret(self, var_name: str, value: str | None) -> None: if value == "changethis": message = ( diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index cbb7d88..a3ec609 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -1,4 +1,5 @@ from .chat import Chat # noqa: F401 +from .podcast import Podcast # noqa: F401 from .common import * # noqa: F403, if you have base mixins here from .course import Course # noqa: F401 from .document import Document # noqa: F401 diff --git a/backend/app/models/course.py b/backend/app/models/course.py index c6283a9..c13a580 100644 --- a/backend/app/models/course.py +++ b/backend/app/models/course.py @@ -38,6 +38,13 @@ class Course(CourseBase, table=True): sa_relationship_kwargs={"cascade": "all, delete-orphan"}, ) chats: list["Chat"] = Relationship(back_populates="course") + podcasts: list["Podcast"] = Relationship( + back_populates="course", + sa_relationship_kwargs={ + "cascade": "all, delete-orphan", + "passive_deletes": True, + }, + ) created_at: datetime = Field( default_factory=lambda: datetime.now(timezone.utc), diff --git a/backend/app/models/podcast.py b/backend/app/models/podcast.py new file mode 100644 index 0000000..785791f --- /dev/null +++ b/backend/app/models/podcast.py @@ -0,0 +1,30 @@ +import uuid +from datetime import datetime, timezone + +from sqlalchemy import func +from sqlmodel import Field, Relationship, SQLModel, text + + +class Podcast(SQLModel, table=True): + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + course_id: uuid.UUID = Field(foreign_key="course.id") + title: str + transcript: str + audio_path: str # local path or S3 key/URL depending on storage backend + storage_backend: str = Field(default="local") + duration_seconds: float | None = None + + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column_kwargs={"server_default": text("CURRENT_TIMESTAMP")}, + ) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column_kwargs={ + "server_default": text("CURRENT_TIMESTAMP"), + "onupdate": func.now(), + }, + ) + + course: "Course" = Relationship(back_populates="podcasts") # noqa: F821 + diff --git a/backend/app/schemas/public.py b/backend/app/schemas/public.py index 3cdede4..00bc2a7 100644 --- a/backend/app/schemas/public.py +++ b/backend/app/schemas/public.py @@ -205,3 +205,24 @@ class ChatPublic(PydanticBase): is_system: bool created_at: datetime updated_at: datetime + + +# ---------------------------------------------------------------------- +# Podcast Schemas +# ---------------------------------------------------------------------- + + +class PodcastPublic(PydanticBase): + id: uuid.UUID + course_id: uuid.UUID + title: str + transcript: str + audio_path: str + storage_backend: str + duration_seconds: float | None = None + created_at: datetime + updated_at: datetime + + +class PodcastsPublic(BaseModel): + data: list[PodcastPublic] diff --git a/backend/app/services/podcast_service.py b/backend/app/services/podcast_service.py new file mode 100644 index 0000000..a135b56 --- /dev/null +++ b/backend/app/services/podcast_service.py @@ -0,0 +1,368 @@ +""" +Podcast generation service: builds a conversational transcript from course materials +and generates audio using OpenAI TTS. Stores audio locally or in S3 based on config. +""" +import io +import tempfile +import os +import uuid +import logging +from typing import Optional, List, Tuple + +import boto3 +from fastapi import HTTPException + +from app.api.routes.documents import async_openai_client +from app.core.config import settings +from app.models.podcast import Podcast +from app.api.deps import SessionDep +from app.services.rag_service import get_question_embedding, retrieve_relevant_context +import json + +logger = logging.getLogger(__name__) + +ALLOWED_VOICES = { + "alloy", "echo", "fable", "onyx", "nova", "shimmer", + "coral", "verse", "ballad", "ash", "sage", "marin", "cedar", +} + +def _sanitize_voice(voice: str, fallback: str) -> str: + v = (voice or "").strip().lower() + if v in ALLOWED_VOICES: + return v + fb = (fallback or settings.PODCAST_TEACHER_VOICE).strip().lower() + logger.warning("[PODCAST] Unsupported voice '%s'. Using fallback '%s'", voice, fb) + return fb if fb in ALLOWED_VOICES else "alloy" + + +PROMPT_TEMPLATE = ( + "You are producing a short conversational podcast between a Teacher and a Student.\n" + "Use the following course context to guide the conversation. The Teacher should explain core ideas clearly,\n" + "and the Student should ask natural, helpful questions someone might have.\n\n" + "Constraints:\n" + "- Keep it concise (2-4 minutes when spoken).\n" + "- Alternate turns: start with 'Teacher:' then 'Student:', etc.\n" + "- Do not reference that you are an AI.\n" + "- Stay grounded strictly in the provided context; avoid speculation.\n\n" + "Context:\n{context}\n\n" + "Output format:\n" + "Teacher: \n" + "Student: \n" + "Teacher: \n" + "... (2-6 exchanges total)\n" +) + +# Monologue (presentation) prompt: no speaker tags +PROMPT_MONO_TEMPLATE = ( + "You are producing a concise educational presentation (single narrator).\n" + "Use the following course context to deliver a clear, coherent explanation,\n" + "highlighting key ideas, definitions, examples, and takeaways.\n\n" + "Constraints:\n" + "- Keep it to ~2-4 minutes when spoken.\n" + "- No speaker labels or dialogue.\n" + "- Maintain an engaging, instructive, academic tone.\n" + "- Stay strictly grounded in the provided context; no speculation.\n\n" + "Context:\n{context}\n\n" + "Output: A single continuous narrative (no role tags).\n" +) + + +async def generate_transcript_from_context(context: str, title: str) -> str: + system = ( + "You create engaging, accurate educational dialog scripts suitable for audio narration." + ) + user = PROMPT_TEMPLATE.format(context=context) + resp = await async_openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": f"Title: {title}\n\n{user}"}, + ], + temperature=0.6, + max_tokens=1200, + ) + content = resp.choices[0].message.content or "" + return content.strip() + + +async def generate_presentation_transcript(context: str, title: str) -> str: + system = ( + "You create concise, accurate educational presentations suitable for audio narration." + ) + user = PROMPT_MONO_TEMPLATE.format(context=context) + resp = await async_openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": f"Title: {title}\n\n{user}"}, + ], + temperature=0.6, + max_tokens=1200, + ) + content = resp.choices[0].message.content or "" + return content.strip() + + +async def generate_dialog_turns_from_context(context: str, title: str) -> list[dict]: + """Ask LLM to produce structured dialog JSON: {"turns": [{"role":"teacher"|"student","text":"..."}, ...]}""" + system = ( + "You create concise, accurate educational dialogues grounded in provided context." + " Return ONLY strict JSON with the following structure and nothing else:" + " {\n \"turns\": [ { \"role\": \"teacher\", \"text\": \"...\" }, { \"role\": \"student\", \"text\": \"...\" } ]\n }" + ) + user = ( + "Create a 2-4 minute dialogue alternating roles 'teacher' and 'student'.\n" + "Rules:\n- No extra commentary, return pure JSON.\n- Alternate roles.\n- 4 to 10 total turns.\n- Stay within the context.\n\n" + f"Title: {title}\nContext:\n{context}" + ) + resp = await async_openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": user}, + ], + temperature=0.6, + max_tokens=1200, + ) + content = resp.choices[0].message.content or "" + try: + data = json.loads(content) + turns = data.get("turns", []) + # Strict: accept only teacher/student roles + cleaned = [] + for t in turns: + role = (t.get("role") or "").lower() + text = (t.get("text") or "").strip() + if role in ("teacher", "student") and text: + cleaned.append({"role": role, "text": text}) + if cleaned: + return cleaned + # If JSON parsed but roles invalid/empty, fall back to text parsing + logger.warning("[PODCAST] Dialog JSON contained no teacher/student roles; falling back to text parsing") + txt = await generate_transcript_from_context(context, title) + segs = _parse_dialog_segments(txt) + return [{"role": r, "text": t} for r, t in segs] + except Exception: + logger.warning("[PODCAST] JSON dialog parse failed; falling back to text parsing") + # fallback to text-based transcript and parsing + txt = await generate_transcript_from_context(context, title) + segs = _parse_dialog_segments(txt) + return [{"role": r, "text": t} for r, t in segs] + + +async def tts_generate_audio(transcript: str) -> bytes: + """Generate TTS audio bytes from transcript using OpenAI TTS.""" + try: + # Prefer streaming API to avoid response shape issues across SDK versions + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp: + tmp_path = tmp.name + try: + safe_voice = _sanitize_voice(settings.PODCAST_TEACHER_VOICE, settings.PODCAST_TEACHER_VOICE) + async with async_openai_client.audio.speech.with_streaming_response.create( + model="gpt-4o-mini-tts", + voice=safe_voice, + input=transcript, + ) as response: + await response.stream_to_file(tmp_path) + with open(tmp_path, "rb") as f: + return f.read() + finally: + try: + os.remove(tmp_path) + except Exception: + pass + except Exception as e: + logger.exception("[PODCAST] TTS generation failed: %s", e) + raise HTTPException(status_code=500, detail=f"TTS generation failed: {e}") + + +def ensure_local_dir(path: str) -> None: + os.makedirs(path, exist_ok=True) + + +def store_audio_local(audio_bytes: bytes, filename: str) -> str: + ensure_local_dir(settings.PODCAST_LOCAL_DIR) + dest_path = os.path.join(settings.PODCAST_LOCAL_DIR, filename) + with open(dest_path, "wb") as f: + f.write(audio_bytes) + return dest_path + + +def store_audio_s3(audio_bytes: bytes, key: str) -> str: + if not settings.S3_BUCKET_NAME or not settings.AWS_REGION: + raise HTTPException(status_code=500, detail="S3 configuration is missing") + s3 = boto3.client( + "s3", + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_REGION, + ) + bucket = settings.S3_BUCKET_NAME + s3.put_object(Bucket=bucket, Key=key, Body=audio_bytes, ContentType="audio/mpeg") + # Return an s3:// URL; clients should hit our audio endpoint or a presigned URL + return f"s3://{bucket}/{key}" + + +async def generate_podcast_for_course( + session: SessionDep, + course_id: uuid.UUID, + title: str, + teacher_voice: str, + student_voice: str, + narrator_voice: str, + mode: str = "dialogue", + topics: Optional[str] = None, + document_ids: Optional[list[uuid.UUID]] = None, +) -> Podcast: + # Retrieve broad context for the course by embedding a generic request and pulling top content + focus = f" Provide an overview for course {course_id}." + (f" Focus on: {topics}." if topics else "") + question_embedding = await get_question_embedding(focus) + context = await retrieve_relevant_context(question_embedding, course_id, top_k=8, document_ids=document_ids) + if not context: + raise HTTPException(status_code=400, detail="No relevant content available for podcast") + + if mode == "presentation": + # Single narrator monologue (no role tags) + monologue = await generate_presentation_transcript(context, f"{title} (Presentation Mode)") + audio_bytes = await _tts_to_bytes(monologue, narrator_voice) + transcript = monologue + else: + # Build alternating-speaker audio with different voices using structured turns + turns = await generate_dialog_turns_from_context(context, title) + # Add a blank line between turns for better readability + transcript = "\n\n".join([ + (f"Teacher: {t['text']}" if t['role'] == "teacher" else f"Student: {t['text']}") + for t in turns + ]) + if turns: + temp_files: List[str] = [] + try: + for t in turns: + voice = teacher_voice if t["role"] == "teacher" else student_voice + path = await _tts_to_temp_file(t["text"], voice) + temp_files.append(path) + audio_bytes = _concat_files(temp_files) + finally: + for p in temp_files: + try: + os.remove(p) + except Exception: + pass + else: + audio_bytes = await tts_generate_audio(transcript) + pod_id = uuid.uuid4() + + storage = settings.PODCAST_STORAGE + if storage == "s3": + key = f"{settings.S3_PREFIX}{pod_id}.mp3" + audio_path = store_audio_s3(audio_bytes, key) + else: + filename = f"{pod_id}.mp3" + audio_path = store_audio_local(audio_bytes, filename) + storage = "local" + + podcast = Podcast( + id=pod_id, + course_id=course_id, + title=title, + transcript=transcript, + audio_path=audio_path, + storage_backend=storage, + ) + session.add(podcast) + session.commit() + session.refresh(podcast) + logger.info( + "[PODCAST] Generated | id=%s | storage=%s | audio_path=%s", + str(podcast.id), + storage, + audio_path, + ) + return podcast + + +def _parse_dialog_segments(transcript: str) -> List[Tuple[str, str]]: + segments: List[Tuple[str, str]] = [] + current_speaker: Optional[str] = None + current_text: List[str] = [] + for raw in transcript.splitlines(): + line = raw.strip() + if not line: + continue + lower = line.lower() + spk = None + if lower.startswith("teacher:"): + spk = "teacher" + content = line.split(":", 1)[1].strip() + elif lower.startswith("student:"): + spk = "student" + content = line.split(":", 1)[1].strip() + else: + content = line + if spk is not None: + if current_speaker is not None and current_text: + segments.append((current_speaker, " ".join(current_text).strip())) + current_speaker = spk + current_text = [content] + else: + if current_speaker is None: + current_speaker = "teacher" + current_text.append(content) + if current_speaker is not None and current_text: + segments.append((current_speaker, " ".join(current_text).strip())) + return [(s, t) for s, t in segments if t] + + +async def _tts_to_temp_file(text: str, voice: str) -> str: + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp: + tmp_path = tmp.name + try: + safe_voice = _sanitize_voice(voice, settings.PODCAST_TEACHER_VOICE) + async with async_openai_client.audio.speech.with_streaming_response.create( + model="gpt-4o-mini-tts", + voice=safe_voice, + input=text, + ) as response: + await response.stream_to_file(tmp_path) + return tmp_path + except Exception as e: + logger.exception("[PODCAST] TTS segment failed (voice=%s): %s", voice, e) + try: + os.remove(tmp_path) + except Exception: + pass + raise HTTPException(status_code=500, detail=f"TTS segment failed: {e}") + + +async def _tts_to_bytes(text: str, voice: str) -> bytes: + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp: + tmp_path = tmp.name + try: + safe_voice = _sanitize_voice(voice, settings.PODCAST_TEACHER_VOICE) + async with async_openai_client.audio.speech.with_streaming_response.create( + model="gpt-4o-mini-tts", + voice=safe_voice, + input=text, + ) as response: + await response.stream_to_file(tmp_path) + with open(tmp_path, 'rb') as f: + return f.read() + finally: + try: + os.remove(tmp_path) + except Exception: + pass + + +def _concat_files(paths: List[str]) -> bytes: + if not paths: + return b"" + # Naive byte concatenation works for many MP3 players and is acceptable for MVP + chunks: List[bytes] = [] + for p in paths: + try: + with open(p, 'rb') as f: + chunks.append(f.read()) + except Exception as e: + logger.exception("[PODCAST] Failed reading segment %s: %s", p, e) + return b"".join(chunks) diff --git a/backend/app/services/rag_service.py b/backend/app/services/rag_service.py index 1de4bbf..626d9b8 100644 --- a/backend/app/services/rag_service.py +++ b/backend/app/services/rag_service.py @@ -27,9 +27,10 @@ async def get_question_embedding(question: str) -> List[float]: async def retrieve_relevant_context( - question_embedding: List[float], + question_embedding: List[float], course_id: uuid.UUID, - top_k: int = 5 + top_k: int = 5, + document_ids: Optional[List[uuid.UUID]] = None, ) -> Optional[str]: """ Retrieve relevant context from course documents using vector similarity @@ -58,9 +59,12 @@ async def retrieve_relevant_context( # Query Pinecone for relevant chunks index = pc.Index(index_name) + pine_filter: dict = {"course_id": {"$eq": str(course_id)}} + if document_ids: + pine_filter["document_id"] = {"$in": [str(d) for d in document_ids]} query_result = index.query( vector=question_embedding, - filter={"course_id": {"$eq": str(course_id)}}, + filter=pine_filter, top_k=top_k, include_metadata=True, ) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 5507cbb..4ac0cc3 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "aiofiles>=24.1.0", "tiktoken>=0.5.0", "numpy>=1.24.0", + "boto3>=1.34.0", ] [tool.uv] diff --git a/frontend/src/app/(routes)/(dashboard)/dashboard/courses/[id]/page.tsx b/frontend/src/app/(routes)/(dashboard)/dashboard/courses/[id]/page.tsx index 3b56ed6..b6fafe7 100644 --- a/frontend/src/app/(routes)/(dashboard)/dashboard/courses/[id]/page.tsx +++ b/frontend/src/app/(routes)/(dashboard)/dashboard/courses/[id]/page.tsx @@ -1,3 +1,4 @@ +import PodcastComponent from '@/components/podcast' import dynamic from 'next/dynamic' import {getCourse} from '@/actions/courses' @@ -53,9 +54,7 @@ export default async function Page(props: {params: Promise<{id: string}>}) { -
- Podcast content will be displayed here -
+
diff --git a/frontend/src/app/api/v1/documents/by-course/[courseId]/route.ts b/frontend/src/app/api/v1/documents/by-course/[courseId]/route.ts new file mode 100644 index 0000000..190f715 --- /dev/null +++ b/frontend/src/app/api/v1/documents/by-course/[courseId]/route.ts @@ -0,0 +1,47 @@ +import { type NextRequest, NextResponse } from 'next/server' +import { API_CONFIG } from '@/lib/api-config' +import { get } from '@/utils' + +interface ContextParams { + params: Promise<{ courseId: string }> +} + +interface ErrorResponse { + detail: string +} + +/** + * Get documents for a course + */ +export async function GET(_req: NextRequest, context: ContextParams): Promise { + try { + const { courseId } = await context.params + const url = API_CONFIG.getBackendUrl(`/api/v1/documents/by-course/${courseId}`) + const headers = await API_CONFIG.getAuthHeaders() + + const res = await fetch(url, { headers, cache: 'no-store' }) + const text = await res.text() + return new NextResponse(text, { + status: res.status, + headers: { 'Content-Type': 'application/json' } + }) + } catch (error) { + // Log error in development for debugging + if (process.env.NODE_ENV === 'development') { + console.error('[Documents API] GET error:', error) + } + + const status: number = get(error as Record, 'response.status', 500) + const body: ErrorResponse = get( + error as Record, + 'response.data.detail', + { detail: 'Internal Server Error' } + ) + return NextResponse.json(body, { status }) + } +} + +export const config = { + runtime: 'nodejs', +} + diff --git a/frontend/src/app/api/v1/podcasts/[courseId]/route.ts b/frontend/src/app/api/v1/podcasts/[courseId]/route.ts new file mode 100644 index 0000000..228ee2f --- /dev/null +++ b/frontend/src/app/api/v1/podcasts/[courseId]/route.ts @@ -0,0 +1,64 @@ +import { type NextRequest, NextResponse } from 'next/server' +import { PodcastService } from '@/lib/podcast-service' +import { get } from '@/utils' + +interface ContextParams { + params: Promise<{ courseId: string }> +} + +interface ErrorResponse { + detail: string +} + +/** + * Get all podcasts for a course + */ +export async function GET(_req: NextRequest, context: ContextParams): Promise { + try { + const { courseId } = await context.params + const data = await PodcastService.getPodcasts(courseId) + return NextResponse.json(data) + } catch (error) { + // Log error in development for debugging + if (process.env.NODE_ENV === 'development') { + console.error('[PodcastService] GET error:', error) + } + + const status: number = get(error as Record, 'response.status', 500) + const body: ErrorResponse = get( + error as Record, + 'response.data.detail', + { detail: 'Internal Server Error' } + ) + return NextResponse.json(body, { status }) + } +} + +/** + * Generate a new podcast for a course + */ +export async function POST(request: NextRequest, context: ContextParams): Promise { + try { + const { courseId } = await context.params + const body = await request.json().catch(() => ({})) + const data = await PodcastService.generatePodcast(courseId, body) + return NextResponse.json(data) + } catch (error) { + // Log error in development for debugging + if (process.env.NODE_ENV === 'development') { + console.error('[PodcastService] POST error:', error) + } + + const status: number = get(error as Record, 'response.status', 500) + const body: ErrorResponse = get( + error as Record, + 'response.data.detail', + { detail: 'Internal Server Error' } + ) + return NextResponse.json(body, { status }) + } +} + +export const config = { + runtime: 'nodejs', +} diff --git a/frontend/src/app/api/v1/podcasts/audio/[podcastId]/route.ts b/frontend/src/app/api/v1/podcasts/audio/[podcastId]/route.ts new file mode 100644 index 0000000..d49ed1b --- /dev/null +++ b/frontend/src/app/api/v1/podcasts/audio/[podcastId]/route.ts @@ -0,0 +1,55 @@ +import { type NextRequest, NextResponse } from 'next/server' +import { PodcastService } from '@/lib/podcast-service' +import { get } from '@/utils' + +interface ContextParams { + params: Promise<{ podcastId: string }> +} + +interface ErrorResponse { + detail: string +} + +/** + * Stream audio for a podcast + * Handles both local file streaming and S3 presigned URLs + */ +export async function GET(_req: NextRequest, context: ContextParams): Promise { + try { + const { podcastId } = await context.params + const response = await PodcastService.getAudioStream(podcastId) + + const contentType = response.headers.get('content-type') || 'application/octet-stream' + + if (contentType.includes('application/json')) { + const text = await response.text() + return new NextResponse(text, { + status: response.status, + headers: { 'Content-Type': 'application/json' } + }) + } + + const blob = await response.arrayBuffer() + return new NextResponse(blob, { + status: response.status, + headers: { 'Content-Type': contentType } + }) + } catch (error) { + // Log error in development for debugging + if (process.env.NODE_ENV === 'development') { + console.error('[PodcastService] Audio stream error:', error) + } + + const status: number = get(error as Record, 'response.status', 500) + const body: ErrorResponse = get( + error as Record, + 'response.data.detail', + { detail: 'Internal Server Error' } + ) + return NextResponse.json(body, { status }) + } +} + +export const config = { + runtime: 'nodejs', +} diff --git a/frontend/src/app/api/v1/podcasts/by-id/[podcastId]/route.ts b/frontend/src/app/api/v1/podcasts/by-id/[podcastId]/route.ts new file mode 100644 index 0000000..7bec6e0 --- /dev/null +++ b/frontend/src/app/api/v1/podcasts/by-id/[podcastId]/route.ts @@ -0,0 +1,35 @@ +import { type NextRequest, NextResponse } from 'next/server' +import { PodcastService } from '@/lib/podcast-service' +import { get } from '@/utils' + +interface ContextParams { + params: Promise<{ podcastId: string }> +} + +interface ErrorResponse { + detail: string +} + +/** + * Delete a podcast by ID + */ +export async function DELETE(_req: NextRequest, context: ContextParams): Promise { + try { + const { podcastId } = await context.params + await PodcastService.deletePodcast(podcastId) + return NextResponse.json({ message: 'Podcast deleted' }, { status: 200 }) + } catch (error) { + const status: number = get(error as Record, 'response.status', 500) + const body: ErrorResponse = get( + error as Record, + 'response.data.detail', + { detail: 'Internal Server Error' } + ) + return NextResponse.json(body, { status }) + } +} + +export const config = { + runtime: 'nodejs', +} + diff --git a/frontend/src/components/podcast.tsx b/frontend/src/components/podcast.tsx new file mode 100644 index 0000000..584a69b --- /dev/null +++ b/frontend/src/components/podcast.tsx @@ -0,0 +1,373 @@ +"use client" + +import { useEffect, useRef, useState, useTransition } from 'react' +import { Play, Pause, SkipBack, SkipForward, Volume2, VolumeX, Trash2 } from 'lucide-react' +import { PodcastUI } from '@/lib/podcast-ui' + +type Podcast = { + id: string + course_id: string + title: string + transcript: string + audio_path: string + storage_backend: string +} + +function formatTime(seconds: number): string { + if (!isFinite(seconds)) return '0:00' + const m = Math.floor(seconds / 60) + const s = Math.floor(seconds % 60) + return `${m}:${s.toString().padStart(2, '0')}` +} + +function PodcastPlayer({ podcastId, title, transcript, onDeleted }: { podcastId: string; title: string; transcript: string; onDeleted: () => void }) { + const audioRef = useRef(null) + const [src, setSrc] = useState(`/api/v1/podcasts/audio/${podcastId}`) + const [isPlaying, setIsPlaying] = useState(false) + const [currentTime, setCurrentTime] = useState(0) + const [duration, setDuration] = useState(0) + const [volume, setVolume] = useState(1) + const [muted, setMuted] = useState(false) + const [resolving, setResolving] = useState(false) + + // Resolve S3 presigned URL if backend returns JSON + async function resolveAudioSrc() { + try { + setResolving(true) + const res = await fetch(`/api/v1/podcasts/audio/${podcastId}`, { + // Encourage JSON path for S3 presign; local stream will ignore this + headers: { Accept: 'application/json' }, + cache: 'no-store', + }) + const contentType = res.headers.get('content-type') || '' + if (contentType.includes('application/json')) { + const data = await res.json().catch(() => ({} as any)) + if (data?.url) setSrc(data.url) + } else { + // Fallback to route-streaming + setSrc(`/api/v1/podcasts/audio/${podcastId}`) + } + } catch { + setSrc(`/api/v1/podcasts/audio/${podcastId}`) + } finally { + setResolving(false) + } + } + + useEffect(() => { + const a = audioRef.current + if (!a) return + const onTime = () => setCurrentTime(a.currentTime) + const onLoaded = () => setDuration(a.duration || 0) + const onEnded = () => setIsPlaying(false) + a.addEventListener('timeupdate', onTime) + a.addEventListener('loadedmetadata', onLoaded) + a.addEventListener('ended', onEnded) + return () => { + a.removeEventListener('timeupdate', onTime) + a.removeEventListener('loadedmetadata', onLoaded) + a.removeEventListener('ended', onEnded) + } + }, []) + + const togglePlay = async () => { + if (!audioRef.current) return + if (!src || src.endsWith(`/api/v1/podcasts/audio/${podcastId}`)) { + // Attempt to resolve S3 URL just-in-time + await resolveAudioSrc() + } + if (isPlaying) { + audioRef.current.pause() + setIsPlaying(false) + } else { + try { + await audioRef.current.play() + setIsPlaying(true) + } catch { + // try once more after resolution + await resolveAudioSrc() + await audioRef.current.play() + setIsPlaying(true) + } + } + } + + const seek = (value: number) => { + if (!audioRef.current) return + audioRef.current.currentTime = value + setCurrentTime(value) + } + + const step = (delta: number) => { + if (!audioRef.current) return + const next = Math.max(0, Math.min((audioRef.current.duration || 0), audioRef.current.currentTime + delta)) + audioRef.current.currentTime = next + setCurrentTime(next) + } + + const toggleMute = () => { + if (!audioRef.current) return + audioRef.current.muted = !audioRef.current.muted + setMuted(audioRef.current.muted) + } + + const changeVolume = (v: number) => { + if (!audioRef.current) return + audioRef.current.volume = v + setVolume(v) + if (v > 0 && audioRef.current.muted) { + audioRef.current.muted = false + setMuted(false) + } + } + + return ( +
+
+
+
{title}
+
+
+ + + + +
+
+ + changeVolume(parseFloat(e.target.value))} + className="w-full accent-cyan-600" + /> +
+
+ +
+ {formatTime(currentTime)} + seek(parseFloat(e.target.value))} + className="w-full accent-cyan-600" + /> + {formatTime(duration)} +
+ +
+ Transcript +
{transcript}
+
+ + {/* hidden audio element */} +
+ ) +} + +type DocItem = { id: string; title?: string; filename?: string; status?: string } + +export default function PodcastComponent({ courseId }: { courseId: string }) { + const [podcasts, setPodcasts] = useState([]) + const [title, setTitle] = useState('') + const [isPending, startTransition] = useTransition() + const [teacherVoice, setTeacherVoice] = useState('coral') + const [studentVoice, setStudentVoice] = useState('alloy') + const [mode, setMode] = useState<'dialogue' | 'presentation'>('dialogue') + const [topics, setTopics] = useState('') + const [documents, setDocuments] = useState([]) + const [selectedDocs, setSelectedDocs] = useState([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + + async function fetchList() { + try { + setLoading(true) + const json = await PodcastUI.list(courseId) + setPodcasts(json.data ?? []) + setError(null) + } catch (e) { + setError((e as Error).message) + } finally { + setLoading(false) + } + } + + useEffect(() => { + fetchList() + ;(async () => { + try { + const r = await fetch(`/api/v1/documents/by-course/${courseId}`, { cache: 'no-store' }) + const j = await r.json() + setDocuments(Array.isArray(j) ? j : []) + } catch { + setDocuments([]) + } + })() + }, [courseId]) + + function handleGenerate() { + startTransition(async () => { + setError(null) + try { + if (!title.trim()) { + setError('Title is required') + return + } + await PodcastUI.generate(courseId, { + title, + mode, + topics: topics || undefined, + teacher_voice: mode === 'dialogue' ? teacherVoice : undefined, + student_voice: mode === 'dialogue' ? studentVoice : undefined, + narrator_voice: mode === 'presentation' ? teacherVoice : undefined, + document_ids: selectedDocs.length ? selectedDocs : undefined, + }) + await fetchList() + + // Clear form fields after successful generation + setTitle('') + setTopics('') + setSelectedDocs([]) + } catch (e) { + setError((e as Error).message) + } + }) + } + + return ( +
+
+ setTitle(e.target.value)} + placeholder="Podcast title" + /> + + {mode === 'dialogue' ? ( + <> + + + + ) : ( + + )} + +
+
+ +