From 4f7ed762ce55f5794961c94efa48d72b8da18201 Mon Sep 17 00:00:00 2001 From: sanjibani Date: Tue, 31 Mar 2026 23:51:00 +0530 Subject: [PATCH] Fix 5 bugs found during code review - Fix UnboundLocalError crash in process_pdf_docs when PDF is corrupted: return empty list on PdfStreamError instead of referencing unassigned `documents` variable. - Honor FAISS_DB_PATH env var in FAISSVectorDatabase.get_db_path() instead of always using a hardcoded relative path. - Add missing `context_list` field to AgentState TypedDict so retriever tool output is no longer silently dropped. - Read GOOGLE_GEMINI env var in helpers.py instead of hardcoding gemini-2.0-flash, keeping model choice consistent with the main conversations endpoint. - Fix typo "avaiable" -> "available" in the summarise prompt template. Co-Authored-By: Claude Opus 4.6 --- backend/src/agents/retriever_typing.py | 1 + backend/src/api/routers/helpers.py | 7 ++++++- backend/src/prompts/prompt_templates.py | 2 +- backend/src/tools/process_pdf.py | 1 + backend/src/vectorstores/faiss.py | 5 ++++- backend/tests/test_faiss_vectorstore.py | 19 +++++++++++++++++-- 6 files changed, 30 insertions(+), 5 deletions(-) diff --git a/backend/src/agents/retriever_typing.py b/backend/src/agents/retriever_typing.py index f8c7d5db..73ab57fb 100644 --- a/backend/src/agents/retriever_typing.py +++ b/backend/src/agents/retriever_typing.py @@ -9,6 +9,7 @@ class AgentState(TypedDict): tools: list[str] sources: Annotated[list[str], add_messages] urls: Annotated[list[str], add_messages] + context_list: Annotated[list[str], add_messages] chat_history: str agent_type: list[str] mcp_response: Annotated[list[AnyMessage], add_messages] diff --git a/backend/src/api/routers/helpers.py b/backend/src/api/routers/helpers.py index 73c2c983..3d537835 100644 --- a/backend/src/api/routers/helpers.py +++ b/backend/src/api/routers/helpers.py @@ -11,7 +11,12 @@ if not GOOGLE_API_KEY: raise RuntimeError("GOOGLE_API_KEY is not set") -model = "gemini-2.0-flash" +GEMINI_MODEL_MAP = { + "2.0_flash": "gemini-2.0-flash", + "2.5_flash": "gemini-2.5-flash", + "2.5_pro": "gemini-2.5-pro", +} +model = GEMINI_MODEL_MAP.get(os.getenv("GOOGLE_GEMINI", "2.0_flash"), "gemini-2.0-flash") client = OpenAI( base_url="https://generativelanguage.googleapis.com/v1beta/openai/", api_key=GOOGLE_API_KEY, diff --git a/backend/src/prompts/prompt_templates.py b/backend/src/prompts/prompt_templates.py index 0cd436e4..156628d7 100644 --- a/backend/src/prompts/prompt_templates.py +++ b/backend/src/prompts/prompt_templates.py @@ -11,7 +11,7 @@ You must not ask the user to refer to the context in any part of your answer. You must not ask the user to refer to a link that is not a part of your answer. -If there is nothing in the context relevant to the question, simply say "Sorry its not avaiable in my knowledge base." +If there is nothing in the context relevant to the question, simply say "Sorry, it's not available in my knowledge base." Do not try to make up an answer. Anything between the following `context` html blocks is retrieved from a knowledge bank, not part of the conversation with the user. diff --git a/backend/src/tools/process_pdf.py b/backend/src/tools/process_pdf.py index d5cfeac3..b2c17802 100644 --- a/backend/src/tools/process_pdf.py +++ b/backend/src/tools/process_pdf.py @@ -31,6 +31,7 @@ def process_pdf_docs(file_path: str) -> list[Document]: documents = loader.load_and_split(text_splitter=text_splitter) except PdfStreamError: logging.error(f"Error processing PDF: {file_path} is corrupted or incomplete.") + return [] for doc in documents: try: diff --git a/backend/src/vectorstores/faiss.py b/backend/src/vectorstores/faiss.py index d50dd3de..b1accae5 100644 --- a/backend/src/vectorstores/faiss.py +++ b/backend/src/vectorstores/faiss.py @@ -197,9 +197,12 @@ def add_documents( return None def get_db_path(self) -> str: + env_path = os.getenv("FAISS_DB_PATH") + if env_path: + return os.path.abspath(env_path) cur_path = os.path.abspath(__file__) path = os.path.join(cur_path, "../../../", "faiss_db") - path = os.path.abspath(path) # Ensure proper parent directory + path = os.path.abspath(path) return path def save_db(self, name: str) -> None: diff --git a/backend/tests/test_faiss_vectorstore.py b/backend/tests/test_faiss_vectorstore.py index 5202c83c..d190e5d9 100644 --- a/backend/tests/test_faiss_vectorstore.py +++ b/backend/tests/test_faiss_vectorstore.py @@ -184,7 +184,7 @@ def test_add_md_docs_invalid_folder_paths(self): db.add_md_docs(folder_paths="not_a_list") def test_get_db_path(self): - """Test get_db_path returns correct path.""" + """Test get_db_path returns correct default path when env var is unset.""" with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf: mock_hf.return_value = Mock() @@ -192,10 +192,25 @@ def test_get_db_path(self): embeddings_type="HF", embeddings_model_name="test-model" ) - path = db.get_db_path() + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("FAISS_DB_PATH", None) + path = db.get_db_path() assert path.endswith("faiss_db") assert os.path.isabs(path) + def test_get_db_path_from_env(self): + """Test get_db_path respects FAISS_DB_PATH env var.""" + with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf: + mock_hf.return_value = Mock() + + db = FAISSVectorDatabase( + embeddings_type="HF", embeddings_model_name="test-model" + ) + + with patch.dict(os.environ, {"FAISS_DB_PATH": "/tmp/my_faiss"}): + path = db.get_db_path() + assert path == "/tmp/my_faiss" + def test_save_db_without_documents_raises_error(self): """Test save_db raises error when no documents in database.""" with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf: