From f696f0fe60fa85c63b3c776160ef543e28d60b18 Mon Sep 17 00:00:00 2001 From: EwanTauran Date: Mon, 1 Dec 2025 16:49:43 -0800 Subject: [PATCH 1/4] feat: add Airweave tools to crewai-tools - Introduced AirweaveAdvancedSearchTool and AirweaveSearchTool to the project. - Updated pyproject.toml to include airweave-sdk dependency for enhanced functionality. --- lib/crewai-tools/pyproject.toml | 3 + lib/crewai-tools/src/crewai_tools/__init__.py | 6 + .../tools/airweave_tool/README.md | 361 ++++++++++++++++++ .../tools/airweave_tool/__init__.py | 11 + .../airweave_advanced_search_tool.py | 358 +++++++++++++++++ .../airweave_tool/airweave_search_tool.py | 311 +++++++++++++++ .../tests/tools/airweave_tool_test.py | 333 ++++++++++++++++ 7 files changed, 1383 insertions(+) create mode 100644 lib/crewai-tools/src/crewai_tools/tools/airweave_tool/README.md create mode 100644 lib/crewai-tools/src/crewai_tools/tools/airweave_tool/__init__.py create mode 100644 lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py create mode 100644 lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py create mode 100644 lib/crewai-tools/tests/tools/airweave_tool_test.py diff --git a/lib/crewai-tools/pyproject.toml b/lib/crewai-tools/pyproject.toml index 60853ed744..1b88d59bcd 100644 --- a/lib/crewai-tools/pyproject.toml +++ b/lib/crewai-tools/pyproject.toml @@ -95,6 +95,9 @@ qdrant-client = [ apify = [ "langchain-apify>=0.1.2,<1.0.0", ] +airweave = [ + "airweave-sdk>=0.1.0", +] databricks-sdk = [ "databricks-sdk>=0.46.0", diff --git a/lib/crewai-tools/src/crewai_tools/__init__.py b/lib/crewai-tools/src/crewai_tools/__init__.py index df69905734..3de0138e45 100644 --- a/lib/crewai-tools/src/crewai_tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/__init__.py @@ -8,6 +8,10 @@ from crewai_tools.aws.s3.reader_tool import S3ReaderTool from crewai_tools.aws.s3.writer_tool import S3WriterTool from crewai_tools.tools.ai_mind_tool.ai_mind_tool import AIMindTool +from crewai_tools.tools.airweave_tool.airweave_advanced_search_tool import ( + AirweaveAdvancedSearchTool, +) +from crewai_tools.tools.airweave_tool.airweave_search_tool import AirweaveSearchTool from crewai_tools.tools.apify_actors_tool.apify_actors_tool import ApifyActorsTool from crewai_tools.tools.arxiv_paper_tool.arxiv_paper_tool import ArxivPaperTool from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool @@ -196,6 +200,8 @@ __all__ = [ "AIMindTool", + "AirweaveAdvancedSearchTool", + "AirweaveSearchTool", "ApifyActorsTool", "ArxivPaperTool", "BedrockInvokeAgentTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/README.md new file mode 100644 index 0000000000..1eaec3bd3f --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/README.md @@ -0,0 +1,361 @@ +# Airweave Search Tools + +Search across all your connected data sources (Stripe, GitHub, Notion, Slack, and 50+ more) using Airweave's unified search API. + +## Installation + +```bash +pip install 'crewai-tools[airweave]' +``` + +Or install the SDK directly: + +```bash +pip install airweave-sdk +``` + +## Setup + +### 1. Get your API key + +Sign up at [https://app.airweave.ai](https://app.airweave.ai) and get your API key. + +### 2. Set environment variable + +```bash +export AIRWEAVE_API_KEY="your_api_key_here" +``` + +### 3. Create a collection and connect data sources + +Through the Airweave dashboard: +1. Create a new collection +2. Add source connections (Stripe, GitHub, Notion, etc.) +3. Wait for initial sync to complete +4. Copy your collection's `readable_id` + +## Tools + +### AirweaveSearchTool + +Basic search tool for straightforward queries. Mirrors the `client.collections.search()` method from the Airweave Python SDK. + +**When to use:** +- Simple searches without filtering +- Quick lookups across all data sources +- When you don't need advanced features + +**Example:** + +```python +from crewai import Agent, Task, Crew +from crewai_tools import AirweaveSearchTool + +# Initialize the tool +search_tool = AirweaveSearchTool( + collection_id="my-collection-id" +) + +# Create an agent with the tool +agent = Agent( + role="Data Analyst", + goal="Find information from connected data sources", + tools=[search_tool], + verbose=True +) + +# Create a task +task = Task( + description="Find all failed payments from the last month", + agent=agent, + expected_output="List of failed payments with customer details" +) + +# Run the crew +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + +**Get AI-generated answers:** + +```python +from crewai import Task + +task = Task( + description=""" + What are the most common customer complaints this month? + Use response_type='completion' to get an AI-generated summary. + """, + agent=agent +) +``` + +### AirweaveAdvancedSearchTool + +Advanced search tool with filtering, reranking, and fine-tuned control. Mirrors the `client.collections.search_advanced()` method from the Airweave Python SDK. + +**When to use:** +- Filter by specific data sources +- Prioritize recent results +- Set minimum relevance scores +- Enable AI reranking for better results +- Choose specific search methods (hybrid/neural/keyword) + +**Example:** + +```python +from crewai_tools import AirweaveAdvancedSearchTool + +# Initialize with advanced options +advanced_tool = AirweaveAdvancedSearchTool( + collection_id="my-collection-id" +) + +agent = Agent( + role="Customer Support Analyst", + goal="Find recent customer issues from specific sources", + tools=[advanced_tool] +) + +task = Task( + description=""" + Find customer complaints about billing from Zendesk in the last week. + Use these parameters: + - source_filter: 'Zendesk' + - recency_bias: 0.8 + - enable_reranking: True + - score_threshold: 0.7 + """, + agent=agent +) +``` + +## Parameters + +### AirweaveSearchTool Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `query` | str | Required | Search query to find relevant information | +| `limit` | int | 10 | Maximum number of results (1-100) | +| `response_type` | str | "raw" | "raw" for search results, "completion" for AI answer | +| `recency_bias` | float | 0.0 | Weight for recent results (0.0-1.0) | + +### AirweaveAdvancedSearchTool Parameters + +All basic parameters plus: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `source_filter` | str | None | Filter by specific source (e.g., "Stripe", "GitHub") | +| `score_threshold` | float | None | Minimum similarity score (0.0-1.0) | +| `recency_bias` | float | 0.3 | Weight for recent results (0.0-1.0) | +| `enable_reranking` | bool | True | Enable AI reranking for better relevance | +| `search_method` | str | "hybrid" | "hybrid", "neural", or "keyword" | + +### Tool Configuration (Constructor) + +Both tools accept these configuration parameters: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `collection_id` | str | Required | Your collection's readable ID | +| `base_url` | str | None | Custom API URL for self-hosted instances | +| `max_content_length` | int | 300 | Max characters to show per result | + +## What Can Airweave Search? + +Airweave connects to 50+ data sources including: + +### Finance & Billing +- Stripe +- QuickBooks +- Chargebee + +### Development Tools +- GitHub +- GitLab +- Jira +- Linear + +### Collaboration +- Slack +- Microsoft Teams +- Discord + +### Productivity +- Notion +- Google Drive +- Confluence +- Dropbox + +### CRM & Support +- Salesforce +- HubSpot +- Zendesk +- Intercom + +### And many more... + +[See full list of integrations →](https://docs.airweave.ai/integrations) + +## Features + +✅ **Unified Search** - Search across all data sources with one query +✅ **Semantic Search** - Natural language understanding +✅ **Hybrid Search** - Combines vector and keyword search +✅ **AI Reranking** - Improves result relevance (Advanced) +✅ **AI Answers** - Get generated answers via `response_type="completion"` +✅ **Recency Bias** - Prioritize recent results +✅ **Source Filtering** - Search specific sources only (Advanced) +✅ **Score Threshold** - Filter by relevance (Advanced) +✅ **Incremental Sync** - Data stays automatically updated +✅ **Multi-tenant** - Each user has their own collections + +## Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `AIRWEAVE_API_KEY` | Yes | - | Your Airweave API key | + +## Response Types + +### Raw Results (`response_type="raw"`) + +Returns structured search results with: +- Content snippets +- Similarity scores +- Source information +- Entity IDs +- Creation timestamps +- URLs (when available) + +### AI Completion (`response_type="completion"`) + +Returns an AI-generated natural language answer based on the retrieved documents. Use this when you want: +- Summarized information +- Direct answers to questions +- Synthesized insights from multiple sources + +## Search Methods (Advanced Tool) + +### Hybrid (Default) +Combines semantic (neural) and keyword (BM25) search for best results. + +### Neural +Pure semantic search using embeddings. Best for conceptual queries. + +### Keyword +Traditional keyword search. Best for exact term matching. + +## Use Cases + +### Customer Support Agent +```python +agent = Agent( + role="Customer Support Specialist", + goal="Find and resolve customer issues", + tools=[AirweaveAdvancedSearchTool(collection_id="support-data")], + backstory="Expert at finding relevant customer data across Zendesk, Slack, and email" +) +``` + +### Sales Intelligence Agent +```python +agent = Agent( + role="Sales Intelligence Analyst", + goal="Research accounts and opportunities", + tools=[AirweaveSearchTool(collection_id="sales-data")], + backstory="Analyzes data from Salesforce, HubSpot, and LinkedIn" +) +``` + +### Financial Analysis Agent +```python +agent = Agent( + role="Financial Analyst", + goal="Analyze payment trends and issues", + tools=[AirweaveAdvancedSearchTool(collection_id="finance-data")], + backstory="Tracks payments, invoices, and transactions from Stripe and QuickBooks" +) +``` + +### Technical Documentation Agent +```python +agent = Agent( + role="Documentation Specialist", + goal="Answer technical questions from internal docs", + tools=[AirweaveSearchTool(collection_id="docs-collection")], + backstory="Searches through Notion, Confluence, and GitHub repos" +) +``` + +## Error Handling + +The tools handle errors gracefully and return clear error messages: + +- **Missing API key** - Clear instruction on how to set it +- **Collection not found** - Verifies collection exists +- **No results** - Suggests rephrasing query +- **API errors** - Returns error details for debugging + +## Advanced Filtering Examples + +### Filter by Multiple Sources (Future Enhancement) + +Currently supports single source filtering. For multiple sources, create filter using SDK types: + +```python +from airweave import Filter, FieldCondition, MatchAny + +# In your agent's task description, specify the filter logic +# The tool currently supports single source_filter parameter +``` + +### Combine Multiple Filters + +For complex filtering needs beyond single source, consider using the Airweave SDK directly within a custom tool or use multiple search calls. + +## Best Practices + +1. **Start with Basic Search** - Use `AirweaveSearchTool` for most queries +2. **Use Advanced When Needed** - Switch to `AirweaveAdvancedSearchTool` when you need filtering or reranking +3. **Set Appropriate Limits** - Higher limits for comprehensive searches, lower for quick lookups +4. **Enable Reranking for Complex Queries** - Better results for nuanced questions +5. **Use Recency Bias for Time-Sensitive Data** - Great for support tickets, recent transactions +6. **Choose Response Type Based on Need** - "raw" for structured data, "completion" for answers +7. **Filter by Source for Focused Searches** - Reduces noise when you know the source + +## Troubleshooting + +### "No results found" +- Check that your collection has data synced +- Verify sync jobs completed successfully +- Try broader search terms +- Lower score_threshold if using advanced search + +### "Unable to generate an answer" +- Ensure you have relevant data in your collection +- Try response_type="raw" to see what results are available +- Rephrase your query to be more specific + +### Import errors +```bash +pip install --upgrade airweave-sdk crewai-tools +``` + +## Learn More + +- [Airweave Documentation](https://docs.airweave.ai) +- [API Reference](https://docs.airweave.ai/api-reference) +- [Python SDK](https://github.com/airweave-ai/python-sdk) +- [Get API Key](https://app.airweave.ai) +- [CrewAI Documentation](https://docs.crewai.com) + +## Support + +- Discord: [Join Airweave Community](https://discord.gg/airweave) +- Email: support@airweave.ai +- GitHub Issues: [airweave-ai/python-sdk](https://github.com/airweave-ai/python-sdk/issues) + + diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/__init__.py new file mode 100644 index 0000000000..5d71c9ce27 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/__init__.py @@ -0,0 +1,11 @@ +"""Airweave search tools for CrewAI.""" + +from .airweave_search_tool import AirweaveSearchTool +from .airweave_advanced_search_tool import AirweaveAdvancedSearchTool + +__all__ = [ + "AirweaveSearchTool", + "AirweaveAdvancedSearchTool", +] + + diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py new file mode 100644 index 0000000000..d7aa8c0147 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py @@ -0,0 +1,358 @@ +"""Airweave Advanced Search Tool with filtering and reranking.""" + +import os +from typing import Any, List, Optional, Type + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field + + +class AirweaveAdvancedSearchToolSchema(BaseModel): + """Input schema for AirweaveAdvancedSearchTool.""" + + query: str = Field( + ..., + description="The search query to find relevant information" + ) + limit: Optional[int] = Field( + default=10, + ge=1, + le=100, + description="Maximum number of results to return (1-100)" + ) + offset: Optional[int] = Field( + default=0, + ge=0, + description="Number of results to skip for pagination" + ) + response_type: Optional[str] = Field( + default="raw", + description="Response format: 'raw' for search results or 'completion' for AI-generated answer" + ) + source_filter: Optional[str] = Field( + default=None, + description="Filter by specific source name (e.g., 'Stripe', 'GitHub', 'Slack')" + ) + score_threshold: Optional[float] = Field( + default=None, + ge=0.0, + le=1.0, + description="Minimum similarity score threshold (0.0-1.0)" + ) + recency_bias: Optional[float] = Field( + default=0.3, + ge=0.0, + le=1.0, + description="Weight for recent results (0.0=no bias, 1.0=only recency). Default: 0.3" + ) + enable_reranking: Optional[bool] = Field( + default=True, + description="Enable AI reranking for better relevance" + ) + search_method: Optional[str] = Field( + default="hybrid", + description="Search method: 'hybrid' (default), 'neural', or 'keyword'" + ) + + +class AirweaveAdvancedSearchTool(BaseTool): + """ + Advanced search across Airweave collections with filtering and reranking. + + This tool provides advanced search capabilities including: + - Source filtering (search only specific data sources) + - Recency bias (prioritize recent results) + - Score threshold filtering + - AI-powered reranking for improved relevance + - Query expansion for better recall + - Multiple search methods (hybrid, neural, keyword) + + Mirrors the client.collections.search_advanced() method from the Airweave Python SDK. + Use this when you need filtering, reranking, or fine-tuned search control. + """ + + model_config = {"arbitrary_types_allowed": True} + + name: str = "Airweave Advanced Search" + description: str = ( + "Advanced search with filtering and AI enhancements. Use this when you need to: " + "filter by specific sources, prioritize recent results, set minimum relevance scores, " + "enable AI reranking, or use specific search methods (hybrid/neural/keyword)." + ) + args_schema: Type[BaseModel] = AirweaveAdvancedSearchToolSchema + + # Required configuration + collection_id: str = Field( + ..., + description="The readable ID of the Airweave collection to search" + ) + + # Optional configuration + base_url: Optional[str] = Field( + default=None, + description="Custom Airweave API base URL" + ) + max_content_length: int = Field( + default=300, + description="Maximum content length to display per result" + ) + + # Dependencies + package_dependencies: List[str] = ["airweave-sdk"] + env_vars: List[EnvVar] = [ + EnvVar( + name="AIRWEAVE_API_KEY", + description="API key for Airweave", + required=True + ), + ] + + def __init__(self, **kwargs: Any) -> None: + """Initialize the advanced search tool.""" + super().__init__(**kwargs) + + # Lazy import + try: + from airweave import AirweaveSDK + except ImportError: + raise ImportError( + "Missing required package 'airweave-sdk'. Install with:\n" + " pip install airweave-sdk\n" + "or\n" + " pip install 'crewai-tools[airweave]'" + ) + + # Validate API key + api_key = os.getenv("AIRWEAVE_API_KEY") + if not api_key: + raise ValueError( + "AIRWEAVE_API_KEY environment variable is required." + ) + + # Get version safely + try: + from importlib.metadata import version + package_version = version("crewai-tools") + except Exception: + package_version = "unknown" + + # Initialize client + client_kwargs = { + "api_key": api_key, + "framework_name": "crewai", + "framework_version": package_version, + } + if self.base_url: + client_kwargs["base_url"] = self.base_url + + self._client = AirweaveSDK(**client_kwargs) + + def _run( + self, + query: str, + limit: int = 10, + offset: int = 0, + response_type: str = "raw", + source_filter: Optional[str] = None, + score_threshold: Optional[float] = None, + recency_bias: float = 0.3, + enable_reranking: bool = True, + search_method: str = "hybrid", + **kwargs: Any + ) -> str: + """Execute advanced search with filters.""" + try: + # Validate response_type + if response_type not in ["raw", "completion"]: + response_type = "raw" + + # Validate search_method + if search_method not in ["hybrid", "neural", "keyword"]: + search_method = "hybrid" + + # Build filter if source_filter provided + filter_obj = None + if source_filter: + from airweave import FieldCondition, Filter, MatchValue + + filter_obj = Filter( + must=[ + FieldCondition( + key="source_name", + match=MatchValue(value=source_filter) + ) + ] + ) + + # Perform advanced search + response = self._client.collections.search_advanced( + readable_id=self.collection_id, + query=query, + limit=limit, + offset=offset, + score_threshold=score_threshold, + recency_bias=recency_bias, + enable_reranking=enable_reranking, + search_method=search_method, + filter=filter_obj, + response_type=response_type + ) + + # Handle completion response + if response_type == "completion": + if response.completion: + return response.completion + else: + return "Unable to generate an answer from available data. Try rephrasing your question." + + # Handle raw results response + if response.status == "no_results": + return "No results found for your query." + + if response.status == "no_relevant_results": + return "Search completed but no sufficiently relevant results were found. Try adjusting filters or threshold." + + return self._format_results(response.results, limit, source_filter) + + except Exception as e: + return f"Error performing advanced search: {str(e)}" + + async def _arun( + self, + query: str, + limit: int = 10, + offset: int = 0, + response_type: str = "raw", + source_filter: Optional[str] = None, + score_threshold: Optional[float] = None, + recency_bias: float = 0.3, + enable_reranking: bool = True, + search_method: str = "hybrid", + **kwargs: Any + ) -> str: + """Async implementation of advanced search.""" + # Initialize async client if needed + if not hasattr(self, "_async_client"): + from airweave import AsyncAirweaveSDK + + api_key = os.getenv("AIRWEAVE_API_KEY") + + # Get version safely + try: + from importlib.metadata import version + package_version = version("crewai-tools") + except Exception: + package_version = "unknown" + + client_kwargs = { + "api_key": api_key, + "framework_name": "crewai", + "framework_version": package_version, + } + if self.base_url: + client_kwargs["base_url"] = self.base_url + + self._async_client = AsyncAirweaveSDK(**client_kwargs) + + try: + # Validate response_type + if response_type not in ["raw", "completion"]: + response_type = "raw" + + # Validate search_method + if search_method not in ["hybrid", "neural", "keyword"]: + search_method = "hybrid" + + # Build filter + filter_obj = None + if source_filter: + from airweave import FieldCondition, Filter, MatchValue + + filter_obj = Filter( + must=[ + FieldCondition( + key="source_name", + match=MatchValue(value=source_filter) + ) + ] + ) + + # Perform search + response = await self._async_client.collections.search_advanced( + readable_id=self.collection_id, + query=query, + limit=limit, + offset=offset, + score_threshold=score_threshold, + recency_bias=recency_bias, + enable_reranking=enable_reranking, + search_method=search_method, + filter=filter_obj, + response_type=response_type + ) + + # Handle completion response + if response_type == "completion": + if response.completion: + return response.completion + else: + return "Unable to generate an answer from available data." + + # Handle raw results response + if response.status == "no_results": + return "No results found." + + if response.status == "no_relevant_results": + return "Search completed but no sufficiently relevant results found." + + return self._format_results(response.results, limit, source_filter) + + except Exception as e: + return f"Error in async advanced search: {str(e)}" + + def _format_results( + self, + results: List[dict], + limit: int, + source_filter: Optional[str] = None + ) -> str: + """Format advanced search results.""" + if not results: + return "No results found." + + header = f"Found {len(results)} result(s)" + if source_filter: + header += f" from {source_filter}" + header += ":\n" + + formatted = [header] + + for idx, result in enumerate(results[:limit], 1): + payload = result.get("payload", {}) + score = result.get("score", 0.0) + + formatted.append(f"\n--- Result {idx} (Score: {score:.3f}) ---") + + # Content + content = payload.get("md_content", "") + if content: + if len(content) > self.max_content_length: + content = content[:self.max_content_length] + "..." + formatted.append(f"Content: {content}") + + # Metadata + if "source_name" in payload: + formatted.append(f"Source: {payload['source_name']}") + + if "entity_id" in payload: + formatted.append(f"Entity ID: {payload['entity_id']}") + + if "created_at" in payload: + formatted.append(f"Created: {payload['created_at']}") + + if "url" in payload: + formatted.append(f"URL: {payload['url']}") + + return "\n".join(formatted) + + diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py new file mode 100644 index 0000000000..d9d3069572 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py @@ -0,0 +1,311 @@ +"""Airweave Search Tool for CrewAI. + +Search across connected data sources (Stripe, GitHub, Notion, Slack, etc.) +using Airweave's unified search API. +""" + +import os +from typing import Any, List, Optional, Type + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field + + +class AirweaveSearchToolSchema(BaseModel): + """Input schema for AirweaveSearchTool.""" + + query: str = Field( + ..., + description="The search query to find relevant information from your connected data sources" + ) + limit: Optional[int] = Field( + default=10, + ge=1, + le=100, + description="Maximum number of results to return (1-100)" + ) + offset: Optional[int] = Field( + default=0, + ge=0, + description="Number of results to skip for pagination" + ) + response_type: Optional[str] = Field( + default="raw", + description="Response format: 'raw' for search results or 'completion' for AI-generated answer" + ) + recency_bias: Optional[float] = Field( + default=0.0, + ge=0.0, + le=1.0, + description="Weight for recent results (0.0=no bias, 1.0=only recency)" + ) + + +class AirweaveSearchTool(BaseTool): + """ + Search across all connected data sources in an Airweave collection. + + This tool enables agents to search through any data source connected to Airweave, + including Stripe, GitHub, Notion, Slack, HubSpot, Zendesk, and 50+ other integrations. + + Mirrors the client.collections.search() method from the Airweave Python SDK. + Use this for straightforward searches. For advanced filtering and reranking, + use AirweaveAdvancedSearchTool. + """ + + model_config = {"arbitrary_types_allowed": True} + + name: str = "Airweave Search" + description: str = ( + "Search across all connected data sources in your Airweave collection. " + "Use this to find information from Stripe, GitHub, Notion, Slack, and other integrated apps. " + "Supports both raw search results and AI-generated answers via response_type parameter." + ) + args_schema: Type[BaseModel] = AirweaveSearchToolSchema + + # Required configuration + collection_id: str = Field( + ..., + description="The readable ID of the Airweave collection to search" + ) + + # Optional configuration + base_url: Optional[str] = Field( + default=None, + description="Custom Airweave API base URL (for self-hosted instances)" + ) + max_content_length: int = Field( + default=300, + description="Maximum content length to display per result" + ) + + # Dependencies + package_dependencies: List[str] = ["airweave-sdk"] + env_vars: List[EnvVar] = [ + EnvVar( + name="AIRWEAVE_API_KEY", + description="API key for Airweave (get from https://app.airweave.ai)", + required=True + ), + ] + + def __init__(self, **kwargs: Any) -> None: + """Initialize the Airweave search tool.""" + super().__init__(**kwargs) + + # Lazy import + try: + from airweave import AirweaveSDK + except ImportError: + raise ImportError( + "Missing required package 'airweave-sdk'. Install with:\n" + " pip install airweave-sdk\n" + "or\n" + " pip install 'crewai-tools[airweave]'" + ) + + # Validate API key + api_key = os.getenv("AIRWEAVE_API_KEY") + if not api_key: + raise ValueError( + "AIRWEAVE_API_KEY environment variable is required. " + "Get your API key from https://app.airweave.ai" + ) + + # Get version safely + try: + from importlib.metadata import version + package_version = version("crewai-tools") + except Exception: + package_version = "unknown" + + # Initialize client + client_kwargs = { + "api_key": api_key, + "framework_name": "crewai", + "framework_version": package_version, + } + if self.base_url: + client_kwargs["base_url"] = self.base_url + + self._client = AirweaveSDK(**client_kwargs) + + def _run( + self, + query: str, + limit: int = 10, + offset: int = 0, + response_type: str = "raw", + recency_bias: float = 0.0, + **kwargs: Any + ) -> str: + """ + Execute search and return results. + + Args: + query: Search query string + limit: Maximum number of results to return + offset: Number of results to skip for pagination + response_type: 'raw' for search results or 'completion' for AI answer + recency_bias: Weight for recent results (0.0-1.0) + + Returns: + Formatted string containing search results or AI-generated answer + """ + try: + # Validate response_type + if response_type not in ["raw", "completion"]: + response_type = "raw" + + response = self._client.collections.search( + readable_id=self.collection_id, + query=query, + limit=limit, + offset=offset, + response_type=response_type, + recency_bias=recency_bias + ) + + # Handle completion response + if response_type == "completion": + if response.completion: + return response.completion + else: + return "Unable to generate an answer from available data. Try rephrasing your question." + + # Handle raw results response + if response.status == "no_results": + return "No results found for your query." + + if response.status == "no_relevant_results": + return "Search completed but no sufficiently relevant results were found. Try rephrasing your query." + + # Format and return results + return self._format_results(response.results, limit) + + except Exception as e: + return f"Error performing search: {str(e)}" + + async def _arun( + self, + query: str, + limit: int = 10, + offset: int = 0, + response_type: str = "raw", + recency_bias: float = 0.0, + **kwargs: Any + ) -> str: + """ + Async implementation using AsyncAirweaveSDK. + + Args: + query: Search query string + limit: Maximum number of results to return + offset: Number of results to skip for pagination + response_type: 'raw' for search results or 'completion' for AI answer + recency_bias: Weight for recent results (0.0-1.0) + + Returns: + Formatted string containing search results or AI-generated answer + """ + # Initialize async client if needed + if not hasattr(self, "_async_client"): + from airweave import AsyncAirweaveSDK + + api_key = os.getenv("AIRWEAVE_API_KEY") + + # Get version safely + try: + from importlib.metadata import version + package_version = version("crewai-tools") + except Exception: + package_version = "unknown" + + client_kwargs = { + "api_key": api_key, + "framework_name": "crewai", + "framework_version": package_version, + } + if self.base_url: + client_kwargs["base_url"] = self.base_url + + self._async_client = AsyncAirweaveSDK(**client_kwargs) + + try: + # Validate response_type + if response_type not in ["raw", "completion"]: + response_type = "raw" + + response = await self._async_client.collections.search( + readable_id=self.collection_id, + query=query, + limit=limit, + offset=offset, + response_type=response_type, + recency_bias=recency_bias + ) + + # Handle completion response + if response_type == "completion": + if response.completion: + return response.completion + else: + return "Unable to generate an answer from available data." + + # Handle raw results response + if response.status == "no_results": + return "No results found for your query." + + if response.status == "no_relevant_results": + return "Search completed but no sufficiently relevant results were found." + + return self._format_results(response.results, limit) + + except Exception as e: + return f"Error performing async search: {str(e)}" + + def _format_results(self, results: List[dict], limit: int) -> str: + """ + Format search results for agent consumption. + + Args: + results: List of search result dictionaries + limit: Maximum number of results to format + + Returns: + Human-readable formatted string + """ + if not results: + return "No results found." + + formatted = [f"Found {len(results)} result(s):\n"] + + for idx, result in enumerate(results[:limit], 1): + payload = result.get("payload", {}) + score = result.get("score", 0.0) + + formatted.append(f"\n--- Result {idx} (Score: {score:.3f}) ---") + + # Content (truncate if too long) + content = payload.get("md_content", "") + if content: + if len(content) > self.max_content_length: + content = content[:self.max_content_length] + "..." + formatted.append(f"Content: {content}") + + # Metadata + if "source_name" in payload: + formatted.append(f"Source: {payload['source_name']}") + + if "entity_id" in payload: + formatted.append(f"Entity ID: {payload['entity_id']}") + + if "created_at" in payload: + formatted.append(f"Created: {payload['created_at']}") + + if "url" in payload: + formatted.append(f"URL: {payload['url']}") + + return "\n".join(formatted) + + diff --git a/lib/crewai-tools/tests/tools/airweave_tool_test.py b/lib/crewai-tools/tests/tools/airweave_tool_test.py new file mode 100644 index 0000000000..5b74b60588 --- /dev/null +++ b/lib/crewai-tools/tests/tools/airweave_tool_test.py @@ -0,0 +1,333 @@ +"""Unit tests for Airweave tools.""" + +import os +from unittest.mock import Mock, patch + +import pytest + +from crewai_tools.tools.airweave_tool import ( + AirweaveAdvancedSearchTool, + AirweaveSearchTool, +) + + +@pytest.fixture +def mock_env(monkeypatch): + """Set up environment variables.""" + monkeypatch.setenv("AIRWEAVE_API_KEY", "test_api_key_12345") + + +@pytest.fixture +def mock_search_response(): + """Create mock search response with raw results.""" + return Mock( + status="success", + results=[ + { + "score": 0.95, + "payload": { + "md_content": "Test content from Stripe about a customer payment", + "source_name": "Stripe", + "entity_id": "cus_123", + "created_at": "2024-01-15T10:00:00Z", + "url": "https://stripe.com/customers/cus_123" + } + }, + { + "score": 0.87, + "payload": { + "md_content": "GitHub issue about payment integration bug", + "source_name": "GitHub", + "entity_id": "issue_456", + "created_at": "2024-01-14T15:30:00Z" + } + } + ], + response_type="raw", + completion=None + ) + + +@pytest.fixture +def mock_completion_response(): + """Create mock search response with completion.""" + return Mock( + status="success", + results=[], + response_type="completion", + completion="Based on the data from Stripe and GitHub, there were 3 failed payments in the last month due to expired cards." + ) + + +@pytest.fixture +def mock_no_results_response(): + """Create mock response with no results.""" + return Mock( + status="no_results", + results=[], + response_type="raw", + completion=None + ) + + +class TestAirweaveSearchTool: + """Tests for AirweaveSearchTool.""" + + def test_requires_api_key(self, monkeypatch): + """Test that tool requires API key.""" + monkeypatch.delenv("AIRWEAVE_API_KEY", raising=False) + with pytest.raises(ValueError, match="AIRWEAVE_API_KEY"): + AirweaveSearchTool(collection_id="test-collection") + + def test_initialization_with_valid_api_key(self, mock_env): + """Test successful initialization with API key.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK"): + tool = AirweaveSearchTool(collection_id="test-collection") + assert tool.collection_id == "test-collection" + assert tool.name == "Airweave Search" + + def test_basic_search_raw_results(self, mock_env, mock_search_response): + """Test basic search with raw results.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="find failed payments", limit=5) + + # Verify API call + mock_client.collections.search.assert_called_once_with( + readable_id="test-collection", + query="find failed payments", + limit=5, + response_type="raw", + recency_bias=0.0 + ) + + # Verify result format + assert "Found 2 result" in result + assert "Test content from Stripe" in result + assert "Stripe" in result + assert "0.950" in result + assert "GitHub issue" in result + + def test_search_with_completion(self, mock_env, mock_completion_response): + """Test search requesting AI-generated completion.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search.return_value = mock_completion_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="what are the payment issues?", response_type="completion") + + # Verify API call + mock_client.collections.search.assert_called_once_with( + readable_id="test-collection", + query="what are the payment issues?", + limit=10, + response_type="completion", + recency_bias=0.0 + ) + + # Verify completion response + assert "Based on the data from Stripe and GitHub" in result + assert "3 failed payments" in result + + def test_no_results_handling(self, mock_env, mock_no_results_response): + """Test handling of no results.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search.return_value = mock_no_results_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="nonexistent query") + + assert "No results found" in result + + def test_no_relevant_results_handling(self, mock_env): + """Test handling of no relevant results.""" + mock_response = Mock( + status="no_relevant_results", + results=[], + response_type="raw", + completion=None + ) + + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search.return_value = mock_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="vague query") + + assert "no sufficiently relevant results" in result + + def test_error_handling(self, mock_env): + """Test API error handling.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search.side_effect = Exception("API Error: Collection not found") + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="test query") + + assert "Error performing search" in result + assert "API Error" in result + + def test_custom_base_url(self, mock_env): + """Test initialization with custom base URL.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + tool = AirweaveSearchTool( + collection_id="test-collection", + base_url="http://localhost:8001" + ) + + # Verify SDK initialized with custom URL + MockSDK.assert_called_once() + call_kwargs = MockSDK.call_args[1] + assert call_kwargs["base_url"] == "http://localhost:8001" + + def test_recency_bias(self, mock_env, mock_search_response): + """Test search with recency bias.""" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + tool.run(query="recent issues", recency_bias=0.8) + + # Verify recency_bias passed correctly + call_kwargs = mock_client.collections.search.call_args[1] + assert call_kwargs["recency_bias"] == 0.8 + + +class TestAirweaveAdvancedSearchTool: + """Tests for AirweaveAdvancedSearchTool.""" + + def test_requires_api_key(self, monkeypatch): + """Test that tool requires API key.""" + monkeypatch.delenv("AIRWEAVE_API_KEY", raising=False) + with pytest.raises(ValueError, match="AIRWEAVE_API_KEY"): + AirweaveAdvancedSearchTool(collection_id="test-collection") + + def test_initialization(self, mock_env): + """Test successful initialization.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK"): + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + assert tool.collection_id == "test-collection" + assert tool.name == "Airweave Advanced Search" + + def test_advanced_search_with_source_filter(self, mock_env, mock_search_response): + """Test advanced search with source filtering.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run( + query="payment issues", + source_filter="Stripe", + limit=10, + enable_reranking=True + ) + + # Verify API call with filter + mock_client.collections.search_advanced.assert_called_once() + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["query"] == "payment issues" + assert call_kwargs["limit"] == 10 + assert call_kwargs["enable_reranking"] is True + assert call_kwargs["filter"] is not None # Filter object created + + # Verify result includes source info + assert "from Stripe" in result + + def test_advanced_search_with_score_threshold(self, mock_env, mock_search_response): + """Test advanced search with score threshold.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + tool.run(query="test", score_threshold=0.8) + + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["score_threshold"] == 0.8 + + def test_advanced_search_with_search_method(self, mock_env, mock_search_response): + """Test advanced search with different search methods.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + + # Test neural search + tool.run(query="test", search_method="neural") + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["search_method"] == "neural" + + def test_advanced_search_completion_mode(self, mock_env, mock_completion_response): + """Test advanced search with completion response type.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_completion_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run( + query="summarize payment issues", + response_type="completion", + source_filter="Stripe" + ) + + assert "Based on the data" in result + assert "3 failed payments" in result + + def test_advanced_search_no_results(self, mock_env, mock_no_results_response): + """Test advanced search with no results.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_no_results_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run(query="nonexistent", score_threshold=0.99) + + assert "No results found" in result + + def test_advanced_search_error_handling(self, mock_env): + """Test advanced search error handling.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.side_effect = Exception("Filter error") + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run(query="test") + + assert "Error performing advanced search" in result + + def test_recency_bias_default(self, mock_env, mock_search_response): + """Test that advanced search has default recency bias of 0.3.""" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + tool.run(query="test") + + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["recency_bias"] == 0.3 + + From 77c79f04e0f77ffa28995782daef2fdbd2d24125 Mon Sep 17 00:00:00 2001 From: EwanTauran Date: Mon, 1 Dec 2025 16:54:23 -0800 Subject: [PATCH 2/4] feat: enhance Airweave tools with interactive installation and improved error handling - Added module-level import checks for AirweaveSDK and AsyncAirweaveSDK with an interactive installation prompt if missing. - Updated error messages for better user guidance when no results or relevant results are found. - Refactored client initialization to streamline both synchronous and asynchronous client setup. - Improved overall robustness of AirweaveAdvancedSearchTool and AirweaveSearchTool. --- .../airweave_advanced_search_tool.py | 82 +++++++++---------- .../airweave_tool/airweave_search_tool.py | 73 ++++++++--------- 2 files changed, 74 insertions(+), 81 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py index d7aa8c0147..6f07ae789b 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py @@ -6,6 +6,18 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field +# Module-level import with availability flag +try: + from airweave import AirweaveSDK, AsyncAirweaveSDK, FieldCondition, Filter, MatchValue + AIRWEAVE_AVAILABLE = True +except ImportError: + AIRWEAVE_AVAILABLE = False + AirweaveSDK = Any # type: ignore + AsyncAirweaveSDK = Any # type: ignore + FieldCondition = Any # type: ignore + Filter = Any # type: ignore + MatchValue = Any # type: ignore + class AirweaveAdvancedSearchToolSchema(BaseModel): """Input schema for AirweaveAdvancedSearchTool.""" @@ -111,16 +123,24 @@ def __init__(self, **kwargs: Any) -> None: """Initialize the advanced search tool.""" super().__init__(**kwargs) - # Lazy import - try: - from airweave import AirweaveSDK - except ImportError: - raise ImportError( - "Missing required package 'airweave-sdk'. Install with:\n" - " pip install airweave-sdk\n" - "or\n" - " pip install 'crewai-tools[airweave]'" - ) + # Check if package is available, offer interactive installation + if not AIRWEAVE_AVAILABLE: + import click + + if click.confirm( + "You are missing the 'airweave-sdk' package. Would you like to install it?" + ): + import subprocess + try: + subprocess.run(["uv", "add", "airweave-sdk"], check=True) # noqa: S607 + # Import after installation + from airweave import AirweaveSDK, AsyncAirweaveSDK + except subprocess.CalledProcessError as e: + raise ImportError("Failed to install airweave-sdk package") from e + else: + raise ImportError( + "`airweave-sdk` package not found, please run `uv add airweave-sdk`" + ) from None # Validate API key api_key = os.getenv("AIRWEAVE_API_KEY") @@ -129,14 +149,14 @@ def __init__(self, **kwargs: Any) -> None: "AIRWEAVE_API_KEY environment variable is required." ) - # Get version safely + # Get version safely (only once) try: from importlib.metadata import version package_version = version("crewai-tools") except Exception: package_version = "unknown" - # Initialize client + # Initialize client kwargs client_kwargs = { "api_key": api_key, "framework_name": "crewai", @@ -145,7 +165,10 @@ def __init__(self, **kwargs: Any) -> None: if self.base_url: client_kwargs["base_url"] = self.base_url + # Initialize both sync and async clients + from airweave import AirweaveSDK, AsyncAirweaveSDK self._client = AirweaveSDK(**client_kwargs) + self._async_client = AsyncAirweaveSDK(**client_kwargs) def _run( self, @@ -203,14 +226,14 @@ def _run( if response.completion: return response.completion else: - return "Unable to generate an answer from available data. Try rephrasing your question." + return "Unable to generate an answer from available data. Try rephrasing your question or adjusting filters." # Handle raw results response if response.status == "no_results": return "No results found for your query." if response.status == "no_relevant_results": - return "Search completed but no sufficiently relevant results were found. Try adjusting filters or threshold." + return "Search completed but no sufficiently relevant results were found. Try adjusting filters, lowering score threshold, or rephrasing your query." return self._format_results(response.results, limit, source_filter) @@ -231,29 +254,6 @@ async def _arun( **kwargs: Any ) -> str: """Async implementation of advanced search.""" - # Initialize async client if needed - if not hasattr(self, "_async_client"): - from airweave import AsyncAirweaveSDK - - api_key = os.getenv("AIRWEAVE_API_KEY") - - # Get version safely - try: - from importlib.metadata import version - package_version = version("crewai-tools") - except Exception: - package_version = "unknown" - - client_kwargs = { - "api_key": api_key, - "framework_name": "crewai", - "framework_version": package_version, - } - if self.base_url: - client_kwargs["base_url"] = self.base_url - - self._async_client = AsyncAirweaveSDK(**client_kwargs) - try: # Validate response_type if response_type not in ["raw", "completion"]: @@ -296,14 +296,14 @@ async def _arun( if response.completion: return response.completion else: - return "Unable to generate an answer from available data." + return "Unable to generate an answer from available data. Try rephrasing your question or adjusting filters." # Handle raw results response if response.status == "no_results": - return "No results found." + return "No results found for your query." if response.status == "no_relevant_results": - return "Search completed but no sufficiently relevant results found." + return "Search completed but no sufficiently relevant results were found. Try adjusting filters, lowering score threshold, or rephrasing your query." return self._format_results(response.results, limit, source_filter) @@ -354,5 +354,3 @@ def _format_results( formatted.append(f"URL: {payload['url']}") return "\n".join(formatted) - - diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py index d9d3069572..203295f9ec 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py @@ -10,6 +10,15 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field +# Module-level import with availability flag +try: + from airweave import AirweaveSDK, AsyncAirweaveSDK + AIRWEAVE_AVAILABLE = True +except ImportError: + AIRWEAVE_AVAILABLE = False + AirweaveSDK = Any # type: ignore + AsyncAirweaveSDK = Any # type: ignore + class AirweaveSearchToolSchema(BaseModel): """Input schema for AirweaveSearchTool.""" @@ -93,16 +102,24 @@ def __init__(self, **kwargs: Any) -> None: """Initialize the Airweave search tool.""" super().__init__(**kwargs) - # Lazy import - try: - from airweave import AirweaveSDK - except ImportError: - raise ImportError( - "Missing required package 'airweave-sdk'. Install with:\n" - " pip install airweave-sdk\n" - "or\n" - " pip install 'crewai-tools[airweave]'" - ) + # Check if package is available, offer interactive installation + if not AIRWEAVE_AVAILABLE: + import click + + if click.confirm( + "You are missing the 'airweave-sdk' package. Would you like to install it?" + ): + import subprocess + try: + subprocess.run(["uv", "add", "airweave-sdk"], check=True) # noqa: S607 + # Import after installation + from airweave import AirweaveSDK, AsyncAirweaveSDK + except subprocess.CalledProcessError as e: + raise ImportError("Failed to install airweave-sdk package") from e + else: + raise ImportError( + "`airweave-sdk` package not found, please run `uv add airweave-sdk`" + ) from None # Validate API key api_key = os.getenv("AIRWEAVE_API_KEY") @@ -112,14 +129,14 @@ def __init__(self, **kwargs: Any) -> None: "Get your API key from https://app.airweave.ai" ) - # Get version safely + # Get version safely (only once) try: from importlib.metadata import version package_version = version("crewai-tools") except Exception: package_version = "unknown" - # Initialize client + # Initialize client kwargs client_kwargs = { "api_key": api_key, "framework_name": "crewai", @@ -128,7 +145,10 @@ def __init__(self, **kwargs: Any) -> None: if self.base_url: client_kwargs["base_url"] = self.base_url + # Initialize both sync and async clients + from airweave import AirweaveSDK, AsyncAirweaveSDK self._client = AirweaveSDK(**client_kwargs) + self._async_client = AsyncAirweaveSDK(**client_kwargs) def _run( self, @@ -208,29 +228,6 @@ async def _arun( Returns: Formatted string containing search results or AI-generated answer """ - # Initialize async client if needed - if not hasattr(self, "_async_client"): - from airweave import AsyncAirweaveSDK - - api_key = os.getenv("AIRWEAVE_API_KEY") - - # Get version safely - try: - from importlib.metadata import version - package_version = version("crewai-tools") - except Exception: - package_version = "unknown" - - client_kwargs = { - "api_key": api_key, - "framework_name": "crewai", - "framework_version": package_version, - } - if self.base_url: - client_kwargs["base_url"] = self.base_url - - self._async_client = AsyncAirweaveSDK(**client_kwargs) - try: # Validate response_type if response_type not in ["raw", "completion"]: @@ -250,14 +247,14 @@ async def _arun( if response.completion: return response.completion else: - return "Unable to generate an answer from available data." + return "Unable to generate an answer from available data. Try rephrasing your question." # Handle raw results response if response.status == "no_results": return "No results found for your query." if response.status == "no_relevant_results": - return "Search completed but no sufficiently relevant results were found." + return "Search completed but no sufficiently relevant results were found. Try rephrasing your query." return self._format_results(response.results, limit) @@ -307,5 +304,3 @@ def _format_results(self, results: List[dict], limit: int) -> str: formatted.append(f"URL: {payload['url']}") return "\n".join(formatted) - - From 08fcf63d2119080fab7e12ef115ec3699a32084f Mon Sep 17 00:00:00 2001 From: EwanTauran Date: Mon, 1 Dec 2025 17:05:11 -0800 Subject: [PATCH 3/4] feat: update Airweave tools and dependencies - Added airweave-sdk dependency to enhance functionality in Airweave tools. - Refactored imports in AirweaveAdvancedSearchTool and AirweaveSearchTool to avoid scope issues during testing. - Updated tests to ensure proper initialization and error handling for Airweave tools. - Improved handling of source filters and search methods in the Airweave tools. --- .../airweave_advanced_search_tool.py | 25 +- .../airweave_tool/airweave_search_tool.py | 7 +- .../tests/tools/airweave_tool_test.py | 396 ++++++++++-------- uv.lock | 59 ++- 4 files changed, 277 insertions(+), 210 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py index 6f07ae789b..04c437150c 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py @@ -166,9 +166,10 @@ def __init__(self, **kwargs: Any) -> None: client_kwargs["base_url"] = self.base_url # Initialize both sync and async clients - from airweave import AirweaveSDK, AsyncAirweaveSDK - self._client = AirweaveSDK(**client_kwargs) - self._async_client = AsyncAirweaveSDK(**client_kwargs) + # Import from module namespace to avoid scope issues + import crewai_tools.tools.airweave_tool.airweave_advanced_search_tool as airweave_module + self._client = airweave_module.AirweaveSDK(**client_kwargs) + self._async_client = airweave_module.AsyncAirweaveSDK(**client_kwargs) def _run( self, @@ -196,13 +197,14 @@ def _run( # Build filter if source_filter provided filter_obj = None if source_filter: - from airweave import FieldCondition, Filter, MatchValue + # Use module-level imports to avoid scope issues in tests + import crewai_tools.tools.airweave_tool.airweave_advanced_search_tool as airweave_module - filter_obj = Filter( + filter_obj = airweave_module.Filter( must=[ - FieldCondition( + airweave_module.FieldCondition( key="source_name", - match=MatchValue(value=source_filter) + match=airweave_module.MatchValue(value=source_filter) ) ] ) @@ -266,13 +268,14 @@ async def _arun( # Build filter filter_obj = None if source_filter: - from airweave import FieldCondition, Filter, MatchValue + # Use module-level imports to avoid scope issues in tests + import crewai_tools.tools.airweave_tool.airweave_advanced_search_tool as airweave_module - filter_obj = Filter( + filter_obj = airweave_module.Filter( must=[ - FieldCondition( + airweave_module.FieldCondition( key="source_name", - match=MatchValue(value=source_filter) + match=airweave_module.MatchValue(value=source_filter) ) ] ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py index 203295f9ec..a8afd7eaf1 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py @@ -146,9 +146,10 @@ def __init__(self, **kwargs: Any) -> None: client_kwargs["base_url"] = self.base_url # Initialize both sync and async clients - from airweave import AirweaveSDK, AsyncAirweaveSDK - self._client = AirweaveSDK(**client_kwargs) - self._async_client = AsyncAirweaveSDK(**client_kwargs) + # Import from module namespace to avoid scope issues + import crewai_tools.tools.airweave_tool.airweave_search_tool as airweave_module + self._client = airweave_module.AirweaveSDK(**client_kwargs) + self._async_client = airweave_module.AsyncAirweaveSDK(**client_kwargs) def _run( self, diff --git a/lib/crewai-tools/tests/tools/airweave_tool_test.py b/lib/crewai-tools/tests/tools/airweave_tool_test.py index 5b74b60588..225bd1d73b 100644 --- a/lib/crewai-tools/tests/tools/airweave_tool_test.py +++ b/lib/crewai-tools/tests/tools/airweave_tool_test.py @@ -76,76 +76,89 @@ class TestAirweaveSearchTool: def test_requires_api_key(self, monkeypatch): """Test that tool requires API key.""" monkeypatch.delenv("AIRWEAVE_API_KEY", raising=False) - with pytest.raises(ValueError, match="AIRWEAVE_API_KEY"): - AirweaveSearchTool(collection_id="test-collection") + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK"): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + with pytest.raises(ValueError, match="AIRWEAVE_API_KEY"): + AirweaveSearchTool(collection_id="test-collection") def test_initialization_with_valid_api_key(self, mock_env): """Test successful initialization with API key.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK"): - tool = AirweaveSearchTool(collection_id="test-collection") - assert tool.collection_id == "test-collection" - assert tool.name == "Airweave Search" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK"): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + tool = AirweaveSearchTool(collection_id="test-collection") + assert tool.collection_id == "test-collection" + assert tool.name == "Airweave Search" def test_basic_search_raw_results(self, mock_env, mock_search_response): """Test basic search with raw results.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search.return_value = mock_search_response - MockSDK.return_value = mock_client - - tool = AirweaveSearchTool(collection_id="test-collection") - result = tool.run(query="find failed payments", limit=5) - - # Verify API call - mock_client.collections.search.assert_called_once_with( - readable_id="test-collection", - query="find failed payments", - limit=5, - response_type="raw", - recency_bias=0.0 - ) - - # Verify result format - assert "Found 2 result" in result - assert "Test content from Stripe" in result - assert "Stripe" in result - assert "0.950" in result - assert "GitHub issue" in result + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="find failed payments", limit=5) + + # Verify API call + mock_client.collections.search.assert_called_once_with( + readable_id="test-collection", + query="find failed payments", + limit=5, + offset=0, + response_type="raw", + recency_bias=0.0 + ) + + # Verify result format + assert "Found 2 result" in result + assert "Test content from Stripe" in result + assert "Stripe" in result + assert "0.950" in result + assert "GitHub issue" in result def test_search_with_completion(self, mock_env, mock_completion_response): """Test search requesting AI-generated completion.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search.return_value = mock_completion_response - MockSDK.return_value = mock_client - - tool = AirweaveSearchTool(collection_id="test-collection") - result = tool.run(query="what are the payment issues?", response_type="completion") - - # Verify API call - mock_client.collections.search.assert_called_once_with( - readable_id="test-collection", - query="what are the payment issues?", - limit=10, - response_type="completion", - recency_bias=0.0 - ) - - # Verify completion response - assert "Based on the data from Stripe and GitHub" in result - assert "3 failed payments" in result + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search.return_value = mock_completion_response + MockSDK.return_value = mock_client + + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="what are the payment issues?", response_type="completion") + + # Verify API call + mock_client.collections.search.assert_called_once_with( + readable_id="test-collection", + query="what are the payment issues?", + limit=10, + offset=0, + response_type="completion", + recency_bias=0.0 + ) + + # Verify completion response + assert "Based on the data from Stripe and GitHub" in result + assert "3 failed payments" in result def test_no_results_handling(self, mock_env, mock_no_results_response): """Test handling of no results.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search.return_value = mock_no_results_response - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search.return_value = mock_no_results_response + MockSDK.return_value = mock_client - tool = AirweaveSearchTool(collection_id="test-collection") - result = tool.run(query="nonexistent query") + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="nonexistent query") - assert "No results found" in result + assert "No results found" in result def test_no_relevant_results_handling(self, mock_env): """Test handling of no relevant results.""" @@ -156,55 +169,63 @@ def test_no_relevant_results_handling(self, mock_env): completion=None ) - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search.return_value = mock_response - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search.return_value = mock_response + MockSDK.return_value = mock_client - tool = AirweaveSearchTool(collection_id="test-collection") - result = tool.run(query="vague query") + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="vague query") - assert "no sufficiently relevant results" in result + assert "no sufficiently relevant results" in result def test_error_handling(self, mock_env): """Test API error handling.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search.side_effect = Exception("API Error: Collection not found") - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search.side_effect = Exception("API Error: Collection not found") + MockSDK.return_value = mock_client - tool = AirweaveSearchTool(collection_id="test-collection") - result = tool.run(query="test query") + tool = AirweaveSearchTool(collection_id="test-collection") + result = tool.run(query="test query") - assert "Error performing search" in result - assert "API Error" in result + assert "Error performing search" in result + assert "API Error" in result def test_custom_base_url(self, mock_env): """Test initialization with custom base URL.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - tool = AirweaveSearchTool( - collection_id="test-collection", - base_url="http://localhost:8001" - ) - - # Verify SDK initialized with custom URL - MockSDK.assert_called_once() - call_kwargs = MockSDK.call_args[1] - assert call_kwargs["base_url"] == "http://localhost:8001" + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + tool = AirweaveSearchTool( + collection_id="test-collection", + base_url="http://localhost:8001" + ) + + # Verify SDK initialized with custom URL + MockSDK.assert_called_once() + call_kwargs = MockSDK.call_args[1] + assert call_kwargs["base_url"] == "http://localhost:8001" def test_recency_bias(self, mock_env, mock_search_response): """Test search with recency bias.""" - with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search.return_value = mock_search_response - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search.return_value = mock_search_response + MockSDK.return_value = mock_client - tool = AirweaveSearchTool(collection_id="test-collection") - tool.run(query="recent issues", recency_bias=0.8) + tool = AirweaveSearchTool(collection_id="test-collection") + tool.run(query="recent issues", recency_bias=0.8) - # Verify recency_bias passed correctly - call_kwargs = mock_client.collections.search.call_args[1] - assert call_kwargs["recency_bias"] == 0.8 + # Verify recency_bias passed correctly + call_kwargs = mock_client.collections.search.call_args[1] + assert call_kwargs["recency_bias"] == 0.8 class TestAirweaveAdvancedSearchTool: @@ -213,121 +234,144 @@ class TestAirweaveAdvancedSearchTool: def test_requires_api_key(self, monkeypatch): """Test that tool requires API key.""" monkeypatch.delenv("AIRWEAVE_API_KEY", raising=False) - with pytest.raises(ValueError, match="AIRWEAVE_API_KEY"): - AirweaveAdvancedSearchTool(collection_id="test-collection") + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK"): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + with pytest.raises(ValueError, match="AIRWEAVE_API_KEY"): + AirweaveAdvancedSearchTool(collection_id="test-collection") def test_initialization(self, mock_env): """Test successful initialization.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK"): - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - assert tool.collection_id == "test-collection" - assert tool.name == "Airweave Advanced Search" + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK"): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + assert tool.collection_id == "test-collection" + assert tool.name == "Airweave Advanced Search" def test_advanced_search_with_source_filter(self, mock_env, mock_search_response): """Test advanced search with source filtering.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.return_value = mock_search_response - MockSDK.return_value = mock_client - - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - result = tool.run( - query="payment issues", - source_filter="Stripe", - limit=10, - enable_reranking=True - ) - - # Verify API call with filter - mock_client.collections.search_advanced.assert_called_once() - call_kwargs = mock_client.collections.search_advanced.call_args[1] - assert call_kwargs["query"] == "payment issues" - assert call_kwargs["limit"] == 10 - assert call_kwargs["enable_reranking"] is True - assert call_kwargs["filter"] is not None # Filter object created - - # Verify result includes source info - assert "from Stripe" in result + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.Filter") as MockFilter: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.FieldCondition") as MockFieldCondition: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.MatchValue") as MockMatchValue: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run( + query="payment issues", + source_filter="Stripe", + limit=10, + enable_reranking=True + ) + + # Verify API call with filter + mock_client.collections.search_advanced.assert_called_once() + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["query"] == "payment issues" + assert call_kwargs["limit"] == 10 + assert call_kwargs["enable_reranking"] is True + assert call_kwargs["filter"] is not None # Filter object created + + # Verify result includes source info + assert "from Stripe" in result def test_advanced_search_with_score_threshold(self, mock_env, mock_search_response): """Test advanced search with score threshold.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.return_value = mock_search_response - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - tool.run(query="test", score_threshold=0.8) + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + tool.run(query="test", score_threshold=0.8) - call_kwargs = mock_client.collections.search_advanced.call_args[1] - assert call_kwargs["score_threshold"] == 0.8 + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["score_threshold"] == 0.8 def test_advanced_search_with_search_method(self, mock_env, mock_search_response): """Test advanced search with different search methods.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.return_value = mock_search_response - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - # Test neural search - tool.run(query="test", search_method="neural") - call_kwargs = mock_client.collections.search_advanced.call_args[1] - assert call_kwargs["search_method"] == "neural" + # Test neural search + tool.run(query="test", search_method="neural") + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["search_method"] == "neural" def test_advanced_search_completion_mode(self, mock_env, mock_completion_response): """Test advanced search with completion response type.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.return_value = mock_completion_response - MockSDK.return_value = mock_client - - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - result = tool.run( - query="summarize payment issues", - response_type="completion", - source_filter="Stripe" - ) - - assert "Based on the data" in result - assert "3 failed payments" in result + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.Filter") as MockFilter: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.FieldCondition") as MockFieldCondition: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.MatchValue") as MockMatchValue: + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_completion_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run( + query="summarize payment issues", + response_type="completion", + source_filter="Stripe" + ) + + assert "Based on the data" in result + assert "3 failed payments" in result def test_advanced_search_no_results(self, mock_env, mock_no_results_response): """Test advanced search with no results.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.return_value = mock_no_results_response - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_no_results_response + MockSDK.return_value = mock_client - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - result = tool.run(query="nonexistent", score_threshold=0.99) + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run(query="nonexistent", score_threshold=0.99) - assert "No results found" in result + assert "No results found" in result def test_advanced_search_error_handling(self, mock_env): """Test advanced search error handling.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.side_effect = Exception("Filter error") - MockSDK.return_value = mock_client + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search_advanced.side_effect = Exception("Filter error") + MockSDK.return_value = mock_client - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - result = tool.run(query="test") + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + result = tool.run(query="test") - assert "Error performing advanced search" in result + assert "Error performing advanced search" in result def test_recency_bias_default(self, mock_env, mock_search_response): """Test that advanced search has default recency bias of 0.3.""" - with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: - mock_client = Mock() - mock_client.collections.search_advanced.return_value = mock_search_response - MockSDK.return_value = mock_client - - tool = AirweaveAdvancedSearchTool(collection_id="test-collection") - tool.run(query="test") - - call_kwargs = mock_client.collections.search_advanced.call_args[1] - assert call_kwargs["recency_bias"] == 0.3 - - + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AIRWEAVE_AVAILABLE", True): + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AirweaveSDK") as MockSDK: + with patch("crewai_tools.tools.airweave_tool.airweave_advanced_search_tool.AsyncAirweaveSDK"): + mock_client = Mock() + mock_client.collections.search_advanced.return_value = mock_search_response + MockSDK.return_value = mock_client + + tool = AirweaveAdvancedSearchTool(collection_id="test-collection") + tool.run(query="test") + + call_kwargs = mock_client.collections.search_advanced.call_args[1] + assert call_kwargs["recency_bias"] == 0.3 diff --git a/uv.lock b/uv.lock index 7d850e5950..fd0620bc03 100644 --- a/uv.lock +++ b/uv.lock @@ -247,6 +247,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] +[[package]] +name = "airweave-sdk" +version = "0.7.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/1a/b18f78a7becae27ef840c6cedad7ae3fc3a2f3cbbe0751f25226bccf3e11/airweave_sdk-0.7.18.tar.gz", hash = "sha256:48ea6abce4fdf5ce4976252392dcf1dc17699313284d08615888d32137f35a61", size = 56680, upload-time = "2025-11-28T22:23:11.38Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/f4/b6d98c2cb5c354cdec64435d69f31653f7b7d2c793721c5c1fcbc94609e4/airweave_sdk-0.7.18-py3-none-any.whl", hash = "sha256:5dc797021823ef84d6008d8d480b9e3345bddfa7e83b5132c40dfcd327062f95", size = 134536, upload-time = "2025-11-28T22:23:10.409Z" }, +] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -1266,6 +1281,9 @@ dependencies = [ ] [package.optional-dependencies] +airweave = [ + { name = "airweave-sdk" }, +] apify = [ { name = "langchain-apify" }, ] @@ -1383,6 +1401,7 @@ xml = [ [package.metadata] requires-dist = [ + { name = "airweave-sdk", marker = "extra == 'airweave'", specifier = ">=0.1.0" }, { name = "beautifulsoup4", specifier = "~=4.13.4" }, { name = "beautifulsoup4", marker = "extra == 'beautifulsoup4'", specifier = ">=4.12.3" }, { name = "beautifulsoup4", marker = "extra == 'bedrock'", specifier = ">=4.13.4" }, @@ -1438,7 +1457,7 @@ requires-dist = [ { name = "weaviate-client", marker = "extra == 'weaviate-client'", specifier = ">=4.10.2" }, { name = "youtube-transcript-api", specifier = "~=1.2.2" }, ] -provides-extras = ["apify", "beautifulsoup4", "bedrock", "browserbase", "composio-core", "contextual", "couchbase", "databricks-sdk", "exa-py", "firecrawl-py", "github", "hyperbrowser", "linkup-sdk", "mcp", "mongodb", "multion", "mysql", "oxylabs", "patronus", "postgresql", "qdrant-client", "rag", "scrapegraph-py", "scrapfly-sdk", "selenium", "serpapi", "singlestore", "snowflake", "spider-client", "sqlalchemy", "stagehand", "tavily-python", "weaviate-client", "xml"] +provides-extras = ["airweave", "apify", "beautifulsoup4", "bedrock", "browserbase", "composio-core", "contextual", "couchbase", "databricks-sdk", "exa-py", "firecrawl-py", "github", "hyperbrowser", "linkup-sdk", "mcp", "mongodb", "multion", "mysql", "oxylabs", "patronus", "postgresql", "qdrant-client", "rag", "scrapegraph-py", "scrapfly-sdk", "selenium", "serpapi", "singlestore", "snowflake", "spider-client", "sqlalchemy", "stagehand", "tavily-python", "weaviate-client", "xml"] [[package]] name = "cryptography" @@ -1843,7 +1862,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -4337,7 +4356,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -4348,7 +4367,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -4375,9 +4394,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -4388,7 +4407,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -4448,9 +4467,9 @@ name = "ocrmac" version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "click" }, - { name = "pillow" }, - { name = "pyobjc-framework-vision" }, + { name = "click", marker = "sys_platform == 'darwin'" }, + { name = "pillow", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-vision", marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/dd/dc/de3e9635774b97d9766f6815bbb3f5ec9bce347115f10d9abbf2733a9316/ocrmac-1.0.0.tar.gz", hash = "sha256:5b299e9030c973d1f60f82db000d6c2e5ff271601878c7db0885e850597d1d2e", size = 1463997, upload-time = "2024-11-07T12:00:00.197Z" } wheels = [ @@ -6050,7 +6069,7 @@ name = "pyobjc-framework-cocoa" version = "12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pyobjc-core" }, + { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/02/a3/16ca9a15e77c061a9250afbae2eae26f2e1579eb8ca9462ae2d2c71e1169/pyobjc_framework_cocoa-12.1.tar.gz", hash = "sha256:5556c87db95711b985d5efdaaf01c917ddd41d148b1e52a0c66b1a2e2c5c1640", size = 2772191, upload-time = "2025-11-14T10:13:02.069Z" } wheels = [ @@ -6066,8 +6085,8 @@ name = "pyobjc-framework-coreml" version = "12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pyobjc-core" }, - { name = "pyobjc-framework-cocoa" }, + { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/30/2d/baa9ea02cbb1c200683cb7273b69b4bee5070e86f2060b77e6a27c2a9d7e/pyobjc_framework_coreml-12.1.tar.gz", hash = "sha256:0d1a4216891a18775c9e0170d908714c18e4f53f9dc79fb0f5263b2aa81609ba", size = 40465, upload-time = "2025-11-14T10:14:02.265Z" } wheels = [ @@ -6083,8 +6102,8 @@ name = "pyobjc-framework-quartz" version = "12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pyobjc-core" }, - { name = "pyobjc-framework-cocoa" }, + { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/94/18/cc59f3d4355c9456fc945eae7fe8797003c4da99212dd531ad1b0de8a0c6/pyobjc_framework_quartz-12.1.tar.gz", hash = "sha256:27f782f3513ac88ec9b6c82d9767eef95a5cf4175ce88a1e5a65875fee799608", size = 3159099, upload-time = "2025-11-14T10:21:24.31Z" } wheels = [ @@ -6100,10 +6119,10 @@ name = "pyobjc-framework-vision" version = "12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pyobjc-core" }, - { name = "pyobjc-framework-cocoa" }, - { name = "pyobjc-framework-coreml" }, - { name = "pyobjc-framework-quartz" }, + { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-coreml", marker = "sys_platform == 'darwin'" }, + { name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c2/5a/08bb3e278f870443d226c141af14205ff41c0274da1e053b72b11dfc9fb2/pyobjc_framework_vision-12.1.tar.gz", hash = "sha256:a30959100e85dcede3a786c544e621ad6eb65ff6abf85721f805822b8c5fe9b0", size = 59538, upload-time = "2025-11-14T10:23:21.979Z" } wheels = [ From a3e3e4f65d6f3b0087d0e4f18a2fac6eaf657abd Mon Sep 17 00:00:00 2001 From: EwanTauran Date: Mon, 1 Dec 2025 17:31:06 -0800 Subject: [PATCH 4/4] feat: enhance Airweave tools with updated module imports and namespace management - Improved import handling in AirweaveAdvancedSearchTool and AirweaveSearchTool to ensure proper module-level variable updates after installation. - Updated namespace references to maintain functionality and avoid scope issues during execution. - Enhanced error handling for airweave-sdk installation failures. --- .../airweave_advanced_search_tool.py | 19 +++++++++++++++++-- .../airweave_tool/airweave_search_tool.py | 10 ++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py index 04c437150c..646a5439e9 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_advanced_search_tool.py @@ -133,8 +133,23 @@ def __init__(self, **kwargs: Any) -> None: import subprocess try: subprocess.run(["uv", "add", "airweave-sdk"], check=True) # noqa: S607 - # Import after installation - from airweave import AirweaveSDK, AsyncAirweaveSDK + # Import after installation and update module-level variables + from airweave import ( + AirweaveSDK as _AirweaveSDK, + AsyncAirweaveSDK as _AsyncAirweaveSDK, + Filter as _Filter, + FieldCondition as _FieldCondition, + MatchValue as _MatchValue + ) + + # Update module namespace to ensure later references work + import crewai_tools.tools.airweave_tool.airweave_advanced_search_tool as airweave_module + airweave_module.AirweaveSDK = _AirweaveSDK + airweave_module.AsyncAirweaveSDK = _AsyncAirweaveSDK + airweave_module.Filter = _Filter + airweave_module.FieldCondition = _FieldCondition + airweave_module.MatchValue = _MatchValue + airweave_module.AIRWEAVE_AVAILABLE = True except subprocess.CalledProcessError as e: raise ImportError("Failed to install airweave-sdk package") from e else: diff --git a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py index a8afd7eaf1..164b8182a2 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/airweave_tool/airweave_search_tool.py @@ -112,8 +112,14 @@ def __init__(self, **kwargs: Any) -> None: import subprocess try: subprocess.run(["uv", "add", "airweave-sdk"], check=True) # noqa: S607 - # Import after installation - from airweave import AirweaveSDK, AsyncAirweaveSDK + # Import after installation and update module-level variables + from airweave import AirweaveSDK as _AirweaveSDK, AsyncAirweaveSDK as _AsyncAirweaveSDK + + # Update module namespace to ensure later references work + import crewai_tools.tools.airweave_tool.airweave_search_tool as airweave_module + airweave_module.AirweaveSDK = _AirweaveSDK + airweave_module.AsyncAirweaveSDK = _AsyncAirweaveSDK + airweave_module.AIRWEAVE_AVAILABLE = True except subprocess.CalledProcessError as e: raise ImportError("Failed to install airweave-sdk package") from e else: