From 7965f1b02d099b9bf1efde47c6dfbb92f81c52c1 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Thu, 19 Mar 2026 11:17:26 +0200 Subject: [PATCH 1/2] fix(knowledge): lazy-load heavy deps in knowledge/__init__.py (#1514) Importing knowledge.pipeline.* no longer triggers the full dependency chain (redis, llama_index, chromadb). The KnowledgeBase class and factory functions are moved to knowledge/_composed.py and loaded on first access via module-level __getattr__. This allows pipeline unit tests to run in dev envs without the full runtime dependency stack. --- autobot-backend/knowledge/__init__.py | 254 ++++++------------------- autobot-backend/knowledge/_composed.py | 96 ++++++++++ 2 files changed, 156 insertions(+), 194 deletions(-) create mode 100644 autobot-backend/knowledge/_composed.py diff --git a/autobot-backend/knowledge/__init__.py b/autobot-backend/knowledge/__init__.py index c210841b3..0b230f2be 100644 --- a/autobot-backend/knowledge/__init__.py +++ b/autobot-backend/knowledge/__init__.py @@ -34,206 +34,17 @@ # Store fact result = await kb.store_fact("Content", {"category": "general"}) -""" - -import asyncio -import logging -import threading -from typing import Optional - -from knowledge.base import KnowledgeBaseCore -from knowledge.bulk import BulkOperationsMixin -from knowledge.categories import CategoriesMixin -from knowledge.collections import CollectionsMixin -from knowledge.documents import DocumentsMixin -from knowledge.facts import FactsMixin -from knowledge.index import IndexMixin -from knowledge.metadata import MetadataMixin -from knowledge.relations import RelationsMixin -from knowledge.search import SearchMixin -from knowledge.stats import StatsMixin -from knowledge.suggestions import SuggestionsMixin -from knowledge.tags import TagsMixin -from knowledge.versioning import VersioningMixin - -logger = logging.getLogger(__name__) - - -class KnowledgeBase( - KnowledgeBaseCore, - StatsMixin, - IndexMixin, - SearchMixin, - FactsMixin, - DocumentsMixin, - TagsMixin, - CategoriesMixin, - CollectionsMixin, - SuggestionsMixin, - MetadataMixin, - VersioningMixin, - BulkOperationsMixin, - RelationsMixin, -): - """ - Unified Knowledge Base implementation. - - This class composes all knowledge base functionality through multiple mixins, - providing a complete API for: - - Fact storage and retrieval - - Semantic and keyword search - - Document processing - - Tag management - - Bulk operations - - Statistics and monitoring - - Index management - - The class uses Method Resolution Order (MRO) to properly inherit from all mixins, - with KnowledgeBaseCore providing the base initialization and configuration. - - Example: - kb = KnowledgeBase() - await kb.initialize() - - # Store a fact - result = await kb.store_fact( - "Python uses indentation for blocks", - metadata={"category": "programming", "tags": ["python", "syntax"]} - ) - - # Search - results = await kb.search("Python syntax", top_k=5) - - # Get stats - stats = await kb.get_stats() - """ - - def __init__(self): - """ - Initialize the composed knowledge base. - - This calls the base __init__ from KnowledgeBaseCore which sets up - all instance variables that are shared across mixins. - """ - super().__init__() - logger.debug("KnowledgeBase instance created (composed from 14 mixins)") - - async def initialize(self) -> bool: - """ - Initialize the knowledge base asynchronously. - - This is the main initialization method that must be called after construction. - It delegates to KnowledgeBaseCore.initialize() which handles: - - Redis connection setup - - LlamaIndex configuration - - ChromaDB vector store initialization - - Stats counter initialization - - Returns: - bool: True if initialization succeeds, False otherwise - - Example: - kb = KnowledgeBase() - success = await kb.initialize() - if success: - logger.info("Knowledge base ready") - """ - # Call the base class initialize which sets up everything - success = await super().initialize() - - if success: - # Additional initialization can go here if needed - # For now, stats initialization is handled in KnowledgeBaseCore - await self._initialize_stats_counters() - - return success - - -# ============================================================================ -# FACTORY FUNCTION - Preferred way to get KnowledgeBase instance -# ============================================================================ - -_knowledge_base_instance: Optional[KnowledgeBase] = None -_initialization_lock = asyncio.Lock() -_reset_lock = threading.Lock() # Thread-safe reset (Issue #613) - - -async def get_knowledge_base(force_new: bool = False) -> KnowledgeBase: - """ - Get or create the singleton knowledge base instance (async factory). - - This is the preferred way to obtain a knowledge base instance. It ensures - that only one instance exists (singleton pattern) and that it's properly - initialized before being returned. - - Args: - force_new: If True, create a new instance even if one exists - - Returns: - KnowledgeBase: Fully initialized knowledge base instance - - Raises: - RuntimeError: If initialization fails - - Example: - # Get the knowledge base (will initialize on first call) - kb = await get_knowledge_base() - - # Now ready to use - results = await kb.search("machine learning") - """ - global _knowledge_base_instance - - async with _initialization_lock: - if force_new or _knowledge_base_instance is None: - logger.info("Creating new KnowledgeBase instance...") - kb = KnowledgeBase() - - # Initialize asynchronously - success = await kb.initialize() - - if not success: - raise RuntimeError("Failed to initialize knowledge base") - - _knowledge_base_instance = kb - logger.info("KnowledgeBase singleton instance created and initialized") - - return _knowledge_base_instance - -def reset_knowledge_base() -> None: - """ - Reset the singleton knowledge base instance (thread-safe). - - This is primarily useful for testing or when you need to force - reinitialization of the knowledge base. - - Note: This does not close existing connections. Call kb.close() first - if you need to properly cleanup resources. - - Issue #613: Uses thread-safe locking to prevent race conditions. - - Example: - kb = await get_knowledge_base() - await kb.close() # Cleanup resources - reset_knowledge_base() # Reset singleton - kb = await get_knowledge_base() # Get fresh instance - """ - global _knowledge_base_instance - with _reset_lock: - _knowledge_base_instance = None - logger.info("KnowledgeBase singleton instance reset") - - -# ============================================================================ -# EXPORTS -# ============================================================================ +Lazy Loading (#1514): + Importing ``knowledge.pipeline.*`` no longer triggers the full + dependency chain (redis, llama_index, chromadb). Heavy classes are + loaded on first access via ``__getattr__``. +""" __all__ = [ "KnowledgeBase", "get_knowledge_base", "reset_knowledge_base", - # Also export individual mixins for advanced use cases "KnowledgeBaseCore", "StatsMixin", "IndexMixin", @@ -249,3 +60,58 @@ def reset_knowledge_base() -> None: "BulkOperationsMixin", "RelationsMixin", ] + + +def __getattr__(name: str): + """Lazy-load heavy knowledge base classes on first access (#1514). + + This avoids pulling redis, llama_index, and chromadb when only + ``knowledge.pipeline.*`` subpackages are imported. + """ + if name not in __all__: + raise AttributeError(f"module 'knowledge' has no attribute {name!r}") + + from knowledge._composed import ( # noqa: F811 + BulkOperationsMixin, + CategoriesMixin, + CollectionsMixin, + DocumentsMixin, + FactsMixin, + IndexMixin, + KnowledgeBase, + KnowledgeBaseCore, + MetadataMixin, + RelationsMixin, + SearchMixin, + StatsMixin, + SuggestionsMixin, + TagsMixin, + VersioningMixin, + get_knowledge_base, + reset_knowledge_base, + ) + + # Populate module globals so subsequent accesses skip __getattr__ + globals().update( + { + "KnowledgeBase": KnowledgeBase, + "get_knowledge_base": get_knowledge_base, + "reset_knowledge_base": reset_knowledge_base, + "KnowledgeBaseCore": KnowledgeBaseCore, + "StatsMixin": StatsMixin, + "IndexMixin": IndexMixin, + "SearchMixin": SearchMixin, + "FactsMixin": FactsMixin, + "DocumentsMixin": DocumentsMixin, + "TagsMixin": TagsMixin, + "CategoriesMixin": CategoriesMixin, + "CollectionsMixin": CollectionsMixin, + "SuggestionsMixin": SuggestionsMixin, + "MetadataMixin": MetadataMixin, + "VersioningMixin": VersioningMixin, + "BulkOperationsMixin": BulkOperationsMixin, + "RelationsMixin": RelationsMixin, + } + ) + + return globals()[name] diff --git a/autobot-backend/knowledge/_composed.py b/autobot-backend/knowledge/_composed.py new file mode 100644 index 000000000..c8a3bb28b --- /dev/null +++ b/autobot-backend/knowledge/_composed.py @@ -0,0 +1,96 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Composed KnowledgeBase class and singleton factory (#1514). + +This module holds the heavy imports (redis, llama_index, chromadb) that +are triggered by the mixin chain. ``knowledge/__init__.py`` lazily +imports from here so that ``knowledge.pipeline.*`` can be used without +pulling in the full dependency tree. +""" + +import asyncio +import logging +import threading +from typing import Optional + +from knowledge.base import KnowledgeBaseCore +from knowledge.bulk import BulkOperationsMixin +from knowledge.categories import CategoriesMixin +from knowledge.collections import CollectionsMixin +from knowledge.documents import DocumentsMixin +from knowledge.facts import FactsMixin +from knowledge.index import IndexMixin +from knowledge.metadata import MetadataMixin +from knowledge.relations import RelationsMixin +from knowledge.search import SearchMixin +from knowledge.stats import StatsMixin +from knowledge.suggestions import SuggestionsMixin +from knowledge.tags import TagsMixin +from knowledge.versioning import VersioningMixin + +logger = logging.getLogger(__name__) + + +class KnowledgeBase( + KnowledgeBaseCore, + StatsMixin, + IndexMixin, + SearchMixin, + FactsMixin, + DocumentsMixin, + TagsMixin, + CategoriesMixin, + CollectionsMixin, + SuggestionsMixin, + MetadataMixin, + VersioningMixin, + BulkOperationsMixin, + RelationsMixin, +): + """Unified Knowledge Base composed from 14 specialised mixins.""" + + def __init__(self): + super().__init__() + logger.debug("KnowledgeBase instance created (composed from 14 mixins)") + + async def initialize(self) -> bool: + """Initialize the knowledge base asynchronously.""" + success = await super().initialize() + if success: + await self._initialize_stats_counters() + return success + + +# Singleton factory +_knowledge_base_instance: Optional[KnowledgeBase] = None +_initialization_lock = asyncio.Lock() +_reset_lock = threading.Lock() + + +async def get_knowledge_base( + force_new: bool = False, +) -> KnowledgeBase: + """Get or create the singleton KnowledgeBase instance.""" + global _knowledge_base_instance + + async with _initialization_lock: + if force_new or _knowledge_base_instance is None: + logger.info("Creating new KnowledgeBase instance...") + kb = KnowledgeBase() + success = await kb.initialize() + if not success: + raise RuntimeError("Failed to initialize knowledge base") + _knowledge_base_instance = kb + logger.info("KnowledgeBase singleton instance created and initialized") + + return _knowledge_base_instance + + +def reset_knowledge_base() -> None: + """Reset the singleton (thread-safe, Issue #613).""" + global _knowledge_base_instance + with _reset_lock: + _knowledge_base_instance = None + logger.info("KnowledgeBase singleton instance reset") From 02733486dd2e47b85213b661a2eff037037a1d58 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Thu, 19 Mar 2026 18:34:11 +0200 Subject: [PATCH 2/2] refactor(knowledge): single source of truth for lazy exports (#1514) Address code review: eliminate triple redundancy of export names. __all__ is now the sole source of truth. Each name is resolved via getattr(_composed, name) and cached in module globals. Also clarified the docstring about what is being deferred. --- autobot-backend/knowledge/__init__.py | 66 ++++++++------------------- 1 file changed, 18 insertions(+), 48 deletions(-) diff --git a/autobot-backend/knowledge/__init__.py b/autobot-backend/knowledge/__init__.py index 0b230f2be..d5431755f 100644 --- a/autobot-backend/knowledge/__init__.py +++ b/autobot-backend/knowledge/__init__.py @@ -36,9 +36,11 @@ result = await kb.store_fact("Content", {"category": "general"}) Lazy Loading (#1514): - Importing ``knowledge.pipeline.*`` no longer triggers the full - dependency chain (redis, llama_index, chromadb). Heavy classes are - loaded on first access via ``__getattr__``. + The ``knowledge/__init__.py`` module body no longer eagerly imports + ``knowledge.base`` and its mixin siblings (which pull in redis, + llama_index, and chromadb). All heavy classes are deferred to + ``knowledge/_composed.py`` and loaded on first attribute access + via PEP 562 ``__getattr__``. """ __all__ = [ @@ -65,53 +67,21 @@ def __getattr__(name: str): """Lazy-load heavy knowledge base classes on first access (#1514). - This avoids pulling redis, llama_index, and chromadb when only - ``knowledge.pipeline.*`` subpackages are imported. + ``__all__`` is the single source of truth for exported names. + Each name is resolved via ``getattr(knowledge._composed, name)`` + and cached in module globals so subsequent accesses skip this + function. + + Note: if a sibling mixin module (knowledge/base.py, etc.) ever + adds a top-level ``from knowledge import X``, it will create a + circular import through _composed.py. Keep such imports inside + function bodies. """ if name not in __all__: raise AttributeError(f"module 'knowledge' has no attribute {name!r}") - from knowledge._composed import ( # noqa: F811 - BulkOperationsMixin, - CategoriesMixin, - CollectionsMixin, - DocumentsMixin, - FactsMixin, - IndexMixin, - KnowledgeBase, - KnowledgeBaseCore, - MetadataMixin, - RelationsMixin, - SearchMixin, - StatsMixin, - SuggestionsMixin, - TagsMixin, - VersioningMixin, - get_knowledge_base, - reset_knowledge_base, - ) - - # Populate module globals so subsequent accesses skip __getattr__ - globals().update( - { - "KnowledgeBase": KnowledgeBase, - "get_knowledge_base": get_knowledge_base, - "reset_knowledge_base": reset_knowledge_base, - "KnowledgeBaseCore": KnowledgeBaseCore, - "StatsMixin": StatsMixin, - "IndexMixin": IndexMixin, - "SearchMixin": SearchMixin, - "FactsMixin": FactsMixin, - "DocumentsMixin": DocumentsMixin, - "TagsMixin": TagsMixin, - "CategoriesMixin": CategoriesMixin, - "CollectionsMixin": CollectionsMixin, - "SuggestionsMixin": SuggestionsMixin, - "MetadataMixin": MetadataMixin, - "VersioningMixin": VersioningMixin, - "BulkOperationsMixin": BulkOperationsMixin, - "RelationsMixin": RelationsMixin, - } - ) + from knowledge import _composed - return globals()[name] + value = getattr(_composed, name) + globals()[name] = value + return value