diff --git a/backend/src/agents/retriever_typing.py b/backend/src/agents/retriever_typing.py index f8c7d5db..73ab57fb 100644 --- a/backend/src/agents/retriever_typing.py +++ b/backend/src/agents/retriever_typing.py @@ -9,6 +9,7 @@ class AgentState(TypedDict): tools: list[str] sources: Annotated[list[str], add_messages] urls: Annotated[list[str], add_messages] + context_list: Annotated[list[str], add_messages] chat_history: str agent_type: list[str] mcp_response: Annotated[list[AnyMessage], add_messages] diff --git a/backend/src/api/routers/helpers.py b/backend/src/api/routers/helpers.py index 73c2c983..3d537835 100644 --- a/backend/src/api/routers/helpers.py +++ b/backend/src/api/routers/helpers.py @@ -11,7 +11,12 @@ if not GOOGLE_API_KEY: raise RuntimeError("GOOGLE_API_KEY is not set") -model = "gemini-2.0-flash" +GEMINI_MODEL_MAP = { + "2.0_flash": "gemini-2.0-flash", + "2.5_flash": "gemini-2.5-flash", + "2.5_pro": "gemini-2.5-pro", +} +model = GEMINI_MODEL_MAP.get(os.getenv("GOOGLE_GEMINI", "2.0_flash"), "gemini-2.0-flash") client = OpenAI( base_url="https://generativelanguage.googleapis.com/v1beta/openai/", api_key=GOOGLE_API_KEY, diff --git a/backend/src/prompts/prompt_templates.py b/backend/src/prompts/prompt_templates.py index 0cd436e4..156628d7 100644 --- a/backend/src/prompts/prompt_templates.py +++ b/backend/src/prompts/prompt_templates.py @@ -11,7 +11,7 @@ You must not ask the user to refer to the context in any part of your answer. You must not ask the user to refer to a link that is not a part of your answer. -If there is nothing in the context relevant to the question, simply say "Sorry its not avaiable in my knowledge base." +If there is nothing in the context relevant to the question, simply say "Sorry, it's not available in my knowledge base." Do not try to make up an answer. Anything between the following `context` html blocks is retrieved from a knowledge bank, not part of the conversation with the user. diff --git a/backend/src/tools/process_pdf.py b/backend/src/tools/process_pdf.py index d5cfeac3..b2c17802 100644 --- a/backend/src/tools/process_pdf.py +++ b/backend/src/tools/process_pdf.py @@ -31,6 +31,7 @@ def process_pdf_docs(file_path: str) -> list[Document]: documents = loader.load_and_split(text_splitter=text_splitter) except PdfStreamError: logging.error(f"Error processing PDF: {file_path} is corrupted or incomplete.") + return [] for doc in documents: try: diff --git a/backend/src/vectorstores/faiss.py b/backend/src/vectorstores/faiss.py index d50dd3de..b1accae5 100644 --- a/backend/src/vectorstores/faiss.py +++ b/backend/src/vectorstores/faiss.py @@ -197,9 +197,12 @@ def add_documents( return None def get_db_path(self) -> str: + env_path = os.getenv("FAISS_DB_PATH") + if env_path: + return os.path.abspath(env_path) cur_path = os.path.abspath(__file__) path = os.path.join(cur_path, "../../../", "faiss_db") - path = os.path.abspath(path) # Ensure proper parent directory + path = os.path.abspath(path) return path def save_db(self, name: str) -> None: diff --git a/backend/tests/test_faiss_vectorstore.py b/backend/tests/test_faiss_vectorstore.py index 5202c83c..d190e5d9 100644 --- a/backend/tests/test_faiss_vectorstore.py +++ b/backend/tests/test_faiss_vectorstore.py @@ -184,7 +184,7 @@ def test_add_md_docs_invalid_folder_paths(self): db.add_md_docs(folder_paths="not_a_list") def test_get_db_path(self): - """Test get_db_path returns correct path.""" + """Test get_db_path returns correct default path when env var is unset.""" with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf: mock_hf.return_value = Mock() @@ -192,10 +192,25 @@ def test_get_db_path(self): embeddings_type="HF", embeddings_model_name="test-model" ) - path = db.get_db_path() + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("FAISS_DB_PATH", None) + path = db.get_db_path() assert path.endswith("faiss_db") assert os.path.isabs(path) + def test_get_db_path_from_env(self): + """Test get_db_path respects FAISS_DB_PATH env var.""" + with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf: + mock_hf.return_value = Mock() + + db = FAISSVectorDatabase( + embeddings_type="HF", embeddings_model_name="test-model" + ) + + with patch.dict(os.environ, {"FAISS_DB_PATH": "/tmp/my_faiss"}): + path = db.get_db_path() + assert path == "/tmp/my_faiss" + def test_save_db_without_documents_raises_error(self): """Test save_db raises error when no documents in database.""" with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf: