From 746b1a0ec47f8a98cd13ad2ead8b9d108e2583c7 Mon Sep 17 00:00:00 2001 From: Matthew Podwysocki Date: Fri, 19 Dec 2025 12:19:21 -0500 Subject: [PATCH] =?UTF-8?q?Revert=20"[tools]=20Enhance=20tool=20descriptio?= =?UTF-8?q?ns=20for=20better=20semantic=20matching=20and=20RA=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit ff99f6d1ef71727148bb2103f1ca415cf7dd659c. --- .../CategorySearchTool.ts | 22 +- src/tools/directions-tool/DirectionsTool.ts | 27 +- src/tools/isochrone-tool/IsochroneTool.ts | 13 +- src/tools/matrix-tool/MatrixTool.ts | 25 +- .../ReverseGeocodeTool.ts | 26 +- .../SearchAndGeocodeTool.ts | 21 +- .../StaticMapImageTool.ts | 29 +- test/tools/README.md | 94 ----- test/tools/description-baseline.test.ts | 292 -------------- test/tools/description-quality.test.ts | 196 ---------- test/tools/semantic-tool-selection.test.ts | 362 ------------------ 11 files changed, 14 insertions(+), 1093 deletions(-) delete mode 100644 test/tools/README.md delete mode 100644 test/tools/description-baseline.test.ts delete mode 100644 test/tools/description-quality.test.ts delete mode 100644 test/tools/semantic-tool-selection.test.ts diff --git a/src/tools/category-search-tool/CategorySearchTool.ts b/src/tools/category-search-tool/CategorySearchTool.ts index 7a27011..5de8060 100644 --- a/src/tools/category-search-tool/CategorySearchTool.ts +++ b/src/tools/category-search-tool/CategorySearchTool.ts @@ -19,26 +19,8 @@ export class CategorySearchTool extends MapboxApiBasedTool< typeof CategorySearchResponseSchema > { name = 'category_search_tool'; - description = `Search for points of interest by category or type (restaurants, gas stations, hotels, ATMs, parking, coffee shops, pharmacies, museums, hospitals, etc.). Returns a list of nearby places matching the category with coordinates, names, addresses, and details. - - Use this when: - - User asks for a type or category of place (plural/generic): "Where are the restaurants nearby?", "Show me all coffee shops" - - Browsing options: "What hotels are available downtown?", "Find gas stations along this route" - - Discovery queries: "What's around here?", "Show me all ATMs in this neighborhood" - - Generic searches with keywords like 'any', 'all', 'nearby', 'around', 'in this area' - - Common use cases: - - Find all POIs by type: "Show me all pharmacies within 5 miles" - - Browse options: "What restaurants are near Times Square?" - - Amenity search: "Find EV charging stations nearby" - - Service discovery: "Where are the nearest hospitals?" - - Shopping: "Show me all grocery stores in downtown Seattle" - - Difference from search_and_geocode_tool: - - Use category_search_tool for types/categories (e.g., "all restaurants", "coffee shops") - - Use search_and_geocode_tool for specific names/brands (e.g., "Starbucks on 5th Ave", "Empire State Building") - - Supports both JSON and text output formats.`; + description = + "Return all places that match a category (industry, amenity, or NAICS‑style code). Use when the user asks for a type of place, plural or generic terms like 'museums', 'coffee shops', 'electric‑vehicle chargers', or when the query includes is‑a phrases such as 'any', 'all', 'nearby'. Do not use when a unique name or brand is provided. Supports both JSON and text output formats."; annotations = { title: 'Category Search Tool', readOnlyHint: true, diff --git a/src/tools/directions-tool/DirectionsTool.ts b/src/tools/directions-tool/DirectionsTool.ts index ef3d1a5..c5f1c5d 100644 --- a/src/tools/directions-tool/DirectionsTool.ts +++ b/src/tools/directions-tool/DirectionsTool.ts @@ -21,31 +21,8 @@ export class DirectionsTool extends MapboxApiBasedTool< typeof DirectionsResponseSchema > { name = 'directions_tool'; - description = `Calculate optimal routes and turn-by-turn directions (navigation) between two or more locations. Returns detailed driving, walking, or cycling instructions with route geometry (path), distance, duration, and estimated travel time (ETA). - - Supports multiple routing modes: - - Driving routes with real-time traffic (car navigation, auto routing) - - Walking directions (pedestrian routes, on foot) - - Cycling routes (bike paths, bicycle-friendly roads) - - Multiple waypoints (road trip planning, multi-stop routes) - - Common use cases: - - Get driving directions from point A to B: "How do I drive from LAX to Hollywood?" - - Calculate travel time and distance: "How long to walk from Central Park to Times Square?" - - Plan multi-stop routes: "Route from hotel to museum to restaurant to hotel" - - Compare route options: "Show me the fastest route avoiding tolls" - - Navigation with traffic: "Driving directions with current traffic conditions" - - Returns: - - Turn-by-turn instructions - - Route geometry (GeoJSON path for mapping) - - Total distance and duration - - Step-by-step maneuvers - - Related tools: - - Use matrix_tool for travel times between many locations - - Use isochrone_tool to see areas reachable within a time limit - - Use search_and_geocode_tool to convert addresses to coordinates first`; + description = + 'Fetches directions from Mapbox API based on provided coordinates and direction method.'; annotations = { title: 'Directions Tool', readOnlyHint: true, diff --git a/src/tools/isochrone-tool/IsochroneTool.ts b/src/tools/isochrone-tool/IsochroneTool.ts index 1536028..5f88ce7 100644 --- a/src/tools/isochrone-tool/IsochroneTool.ts +++ b/src/tools/isochrone-tool/IsochroneTool.ts @@ -16,20 +16,11 @@ export class IsochroneTool extends MapboxApiBasedTool< typeof IsochroneResponseSchema > { name = 'isochrone_tool'; - description = `Computes areas that are reachable within a specified amount of time or distance from a location (reachability analysis, coverage area, service area). Returns the reachable regions as contours of Polygons or LineStrings in GeoJSON format that you can display on a map. - + description = `Computes areas that are reachable within a specified amount of time from a location, and returns the reachable regions as contours of Polygons or LineStrings in GeoJSON format that you can display on a map. Common use cases: - Show a user how far they can travel in X minutes from their current location - Determine whether a destination is within a certain travel time threshold - - Compare travel ranges for different modes of transportation (driving, walking, cycling) - - Visualize service coverage areas for businesses or facilities - - Analyze accessibility and reachability for logistics planning - - Find all areas within 30-minute drive time for delivery zones - - Returns: - - GeoJSON contours showing reachable areas - - Multiple time/distance bands (e.g., 10, 20, 30 minutes) - - Visualizable polygons for mapping coverage`; + - Compare travel ranges for different modes of transportation'`; annotations = { title: 'Isochrone Tool', readOnlyHint: true, diff --git a/src/tools/matrix-tool/MatrixTool.ts b/src/tools/matrix-tool/MatrixTool.ts index 576f5e2..37945bf 100644 --- a/src/tools/matrix-tool/MatrixTool.ts +++ b/src/tools/matrix-tool/MatrixTool.ts @@ -19,29 +19,8 @@ export class MatrixTool extends MapboxApiBasedTool< typeof MatrixResponseSchema > { name = 'matrix_tool'; - description = `Calculate travel times and distances between multiple origin and destination points (one-to-many, many-to-one, or many-to-many routing). Returns a distance/duration matrix showing travel time (ETA) and distance from each origin to each destination. - - Common use cases: - - Logistics and route optimization: "Calculate travel times from warehouse to 10 delivery addresses" - - Find nearest location: "Which store location is closest to this customer?" - - Delivery time estimates: "How long to deliver from restaurant to multiple addresses?" - - Service area analysis: "Calculate distances from office to all branch locations" - - Multi-point comparison: "Compare travel times between 5 hotels and 3 attractions" - - Supports: - - One-to-many: Single origin to multiple destinations - - Many-to-one: Multiple origins to single destination - - Many-to-many: Multiple origins to multiple destinations - - Different travel modes: driving (with traffic), walking, cycling - - Returns: - - Matrix of travel times (durations) between all point pairs - - Matrix of distances between all point pairs - - Efficient bulk calculations for route optimization - - Related tools: - - Use directions_tool for turn-by-turn directions for a single route - - Use isochrone_tool to visualize all areas reachable within a time limit`; + description = + 'Calculates travel times and distances between multiple points using Mapbox Matrix API.'; annotations = { title: 'Matrix Tool', readOnlyHint: true, diff --git a/src/tools/reverse-geocode-tool/ReverseGeocodeTool.ts b/src/tools/reverse-geocode-tool/ReverseGeocodeTool.ts index 1adf81b..029415d 100644 --- a/src/tools/reverse-geocode-tool/ReverseGeocodeTool.ts +++ b/src/tools/reverse-geocode-tool/ReverseGeocodeTool.ts @@ -19,30 +19,8 @@ export class ReverseGeocodeTool extends MapboxApiBasedTool< typeof GeocodingResponseSchema > { name = 'reverse_geocode_tool'; - description = `Convert geographic coordinates (longitude, latitude) into human-readable addresses or place names (reverse geocoding). Returns addresses, cities, towns, neighborhoods, postal codes (zip codes), districts, regions, and countries for a specific coordinate pair. - - Common use cases: - - "What address is at these coordinates?" - Get street address from GPS location - - "Where am I?" - Convert device location to readable address - - "What city is this?" - Identify city/town from coordinates - - "Get postal code for location" - Find zip code or postal code - - "Reverse geocode map click" - Display address when user clicks on map - - Returns information about: - - Street addresses (house number and street name) - - Neighborhoods and districts - - Cities, towns, and villages - - Postal codes and zip codes - - States, provinces, and regions - - Countries - - Note: Use limit=1 for best results (most relevant match). This tool cannot reverse geocode businesses, landmarks, historic sites, and other points of interest - it only returns administrative locations and addresses. - - Related tools: - - Use search_and_geocode_tool for the opposite: convert addresses to coordinates (forward geocoding) - - Use search_and_geocode_tool to find businesses or POIs by name - - Supports both JSON and text output formats.`; + description = + 'Find addresses, cities, towns, neighborhoods, postcodes, districts, regions, and countries around a specified geographic coordinate pair. Converts geographic coordinates (longitude, latitude) into human-readable addresses or place names. Use limit=1 for best results. This tool cannot reverse geocode businesses, landmarks, historic sites, and other points of interest that are not of the types mentioned. Supports both JSON and text output formats.'; annotations = { title: 'Reverse Geocode Tool', readOnlyHint: true, diff --git a/src/tools/search-and-geocode-tool/SearchAndGeocodeTool.ts b/src/tools/search-and-geocode-tool/SearchAndGeocodeTool.ts index b7a5cd7..3e50998 100644 --- a/src/tools/search-and-geocode-tool/SearchAndGeocodeTool.ts +++ b/src/tools/search-and-geocode-tool/SearchAndGeocodeTool.ts @@ -22,25 +22,8 @@ export class SearchAndGeocodeTool extends MapboxApiBasedTool< typeof SearchBoxResponseSchema > { name = 'search_and_geocode_tool'; - description = `Search for specific points of interest (POIs), businesses, brands, landmarks, and convert addresses or place names to coordinates (geocoding). Returns detailed location information including coordinates (latitude/longitude), addresses, and place metadata. - - Use this when: - - User specifies a particular place name, brand, or business (e.g., "Starbucks on 5th Avenue", "Empire State Building") - - Converting addresses to coordinates for mapping or routing - - Looking up specific landmarks, buildings, or locations by name - - Need precise location details for a named place - - Common use cases: - - Geocode an address: "123 Main Street, Seattle, WA" → coordinates - - Find a specific business: "Find the nearest Tesla showroom" - - Locate landmarks: "Where is the Statue of Liberty?" - - Get coordinates for routing: "Get directions from Central Park to Times Square" - - Related tools: - - Use category_search_tool for browsing all places by type (e.g., "all restaurants nearby", "show me gas stations") - - Use reverse_geocode_tool to convert coordinates back to addresses - - Note: Do not use for generic place types like 'museums', 'coffee shops', 'tacos' - category_search_tool is better for that. Setting a proximity point is strongly encouraged for more local results.`; + description = + "Search for POIs, brands, chains, geocode cities, towns, addresses. Do not use for generic place types such as 'museums', 'coffee shops', 'tacos', etc, because category_search_tool is better for that. Setting a proximity point is strongly encouraged for more local results."; annotations = { title: 'Search and Geocode Tool', readOnlyHint: true, diff --git a/src/tools/static-map-image-tool/StaticMapImageTool.ts b/src/tools/static-map-image-tool/StaticMapImageTool.ts index 438298b..d203ef1 100644 --- a/src/tools/static-map-image-tool/StaticMapImageTool.ts +++ b/src/tools/static-map-image-tool/StaticMapImageTool.ts @@ -14,33 +14,8 @@ export class StaticMapImageTool extends MapboxApiBasedTool< typeof StaticMapImageInputSchema > { name = 'static_map_image_tool'; - description = `Generate a static map image URL from Mapbox Static Images API. Creates a snapshot/thumbnail of a map location with optional markers, paths, and overlays. Returns a direct URL to the image (PNG or JPEG format), not an embedded image. - - Common use cases: - - Create shareable map snapshots for reports or documentation - - Generate thumbnail previews of locations for listings or search results - - Embed map images in emails, PDFs, or presentations - - Show route overview as static image - - Create before/after comparison maps - - Display location context in non-interactive formats - - Supports: - - Custom center coordinates and zoom level (0-22) - - Image dimensions up to 1280x1280 pixels - - Multiple map styles (streets, satellite, outdoors, dark, light, etc.) - - Markers with custom colors and labels - - Paths and polylines (routes, boundaries) - - GeoJSON overlays for complex shapes - - Output format: - - Returns direct URL string to image file - - PNG format for vector styles - - JPEG format for raster/satellite styles - - URL can be embedded in HTML, shared, or downloaded - - Related tools: - - Use directions_tool to get route geometry to display on static map - - Use search_and_geocode_tool to get coordinates for map center`; + description = + 'Generates a static map image from Mapbox Static Images API. Supports center coordinates, zoom level (0-22), image size (up to 1280x1280), various Mapbox styles, and overlays (markers, paths, GeoJSON). Returns PNG for vector styles, JPEG for raster-only styles.'; annotations = { title: 'Static Map Image Tool', readOnlyHint: true, diff --git a/test/tools/README.md b/test/tools/README.md deleted file mode 100644 index 23d0740..0000000 --- a/test/tools/README.md +++ /dev/null @@ -1,94 +0,0 @@ -# Tool Description Testing - -This directory contains tests for validating tool description quality and semantic matching capabilities. - -## Test Suites - -### 1. Description Quality Tests (`description-quality.test.ts`) - -Validates that tool descriptions meet quality standards: - -- ✅ Minimum length (>200 characters) -- ✅ Includes use cases and examples -- ✅ Contains relevant keywords for semantic matching -- ✅ Cross-references related tools -- ✅ Follows consistent structure - -**Run:** `npm test -- test/tools/description-quality.test.ts` - -### 2. Description Baseline Tests (`description-baseline.test.ts`) - -Prevents regression of description quality over time: - -- ✅ Maintains minimum word/phrase counts per tool -- ✅ Preserves semantic richness (vocabulary diversity) -- ✅ Ensures domain-specific terminology -- ✅ Validates consistent structure patterns - -**Run:** `npm test -- test/tools/description-baseline.test.ts` - -### 3. Semantic Tool Selection Tests (`semantic-tool-selection.test.ts`) - -**⚠️ Requires OpenAI API Key** - -Validates that tool descriptions work correctly with RAG-based semantic matching using OpenAI embeddings (text-embedding-3-small model). - -Tests query-to-tool matching: - -- ✅ "find coffee shops nearby" → `category_search_tool` -- ✅ "where is Starbucks" → `search_and_geocode_tool` -- ✅ "driving directions" → `directions_tool` -- ✅ "areas reachable in 30 minutes" → `isochrone_tool` -- ✅ Category vs specific place disambiguation -- ✅ Semantic similarity thresholds (>0.5 for relevant tools) - -#### Running Semantic Tests - -**Local Development:** - -```bash -export OPENAI_API_KEY="your-key-here" -npm test -- test/tools/semantic-tool-selection.test.ts -``` - -**CI/CD:** -Set `OPENAI_API_KEY` as a GitHub secret and tests will run automatically. - -**Without API Key:** -Tests are automatically skipped if `OPENAI_API_KEY` is not set. - -## Test Philosophy - -These tests align with our RAG optimization goals: - -1. **Quality Tests** - Maintain description standards -2. **Baseline Tests** - Prevent regressions over time -3. **Semantic Tests** - Validate actual tool selection performance - -The semantic tests are the **core validation** that descriptions work as intended for RAG-based tool selection, while quality/baseline tests ensure consistency. - -## Expected Results - -After RAG-optimized descriptions (PR #78): - -- Average description length: ~1,260 characters -- Vocabulary diversity: 44-52% unique words -- Semantic similarity for relevant queries: >0.5 - -## Updating Baselines - -If you intentionally improve descriptions beyond current baselines, update the thresholds in `description-baseline.test.ts`: - -```typescript -const baselines: Record< - string, - { minLength: number; minWords: number; minPhrases: number } -> = { - search_and_geocode_tool: { - minLength: 800, // Update if improved - minWords: 120, - minPhrases: 15 - } - // ... -}; -``` diff --git a/test/tools/description-baseline.test.ts b/test/tools/description-baseline.test.ts deleted file mode 100644 index a8082f6..0000000 --- a/test/tools/description-baseline.test.ts +++ /dev/null @@ -1,292 +0,0 @@ -// Copyright (c) Mapbox, Inc. -// Licensed under the MIT License. - -/** - * Description Baseline Metrics - * - * This test captures baseline metrics for tool descriptions to prevent - * regressions over time. If descriptions get shorter or lose quality, - * these tests will catch it. - * - * Baseline established after implementing RAG-optimized descriptions. - */ - -import { describe, test, expect } from 'vitest'; -import { getAllTools } from '../../src/tools/toolRegistry.js'; - -interface DescriptionMetrics { - name: string; - length: number; - hasUseCases: boolean; - hasRelatedTools: boolean; - hasExamples: boolean; - phraseCount: number; - wordCount: number; -} - -function analyzeDescription( - toolName: string, - description: string -): DescriptionMetrics { - const desc = description.toLowerCase(); - - return { - name: toolName, - length: description.length, - hasUseCases: - desc.includes('common use cases:') || - desc.includes('use this when:') || - desc.includes('use cases:'), - hasRelatedTools: desc.includes('related tools:'), - hasExamples: - desc.includes('"') || - desc.includes('e.g.') || - desc.includes('such as') || - /:\s+-\s+/.test(desc), - phraseCount: description - .split(/[.,;:\n-]/) - .filter((p) => p.trim().length > 10).length, - wordCount: description.split(/\s+/).length - }; -} - -describe('Description Baseline Metrics', () => { - const tools = getAllTools(); - const mapboxApiTools = tools.filter( - (t) => - t.name !== 'version_tool' && - t.name !== 'resource_reader_tool' && - t.name !== 'category_list_tool' // Deprecated tool - ); - - describe('Overall quality baseline', () => { - test('all Mapbox API tools meet minimum quality thresholds', () => { - const metrics = mapboxApiTools.map((tool) => - analyzeDescription(tool.name, tool.description) - ); - - metrics.forEach((m) => { - // All tools should have substantial descriptions - expect(m.length).toBeGreaterThan(200); - expect(m.wordCount).toBeGreaterThan(30); - expect(m.phraseCount).toBeGreaterThan(5); - - // All tools should have use cases - expect(m.hasUseCases).toBe(true); - - // All tools should have examples - expect(m.hasExamples).toBe(true); - }); - }); - - test('average description length is maintained', () => { - const totalLength = mapboxApiTools.reduce( - (sum, tool) => sum + tool.description.length, - 0 - ); - const avgLength = totalLength / mapboxApiTools.length; - - // Average should be at least 500 characters (allows for some shorter tools) - expect(avgLength).toBeGreaterThan(500); - - console.log(`Average description length: ${avgLength.toFixed(0)} chars`); - }); - }); - - describe('Individual tool baselines', () => { - // Baseline metrics established 2025-12-16 after RAG optimization - const baselines: Record< - string, - { minLength: number; minWords: number; minPhrases: number } - > = { - search_and_geocode_tool: { - minLength: 800, - minWords: 120, - minPhrases: 15 - }, - directions_tool: { - minLength: 900, - minWords: 130, - minPhrases: 18 - }, - category_search_tool: { - minLength: 700, - minWords: 110, - minPhrases: 15 - }, - isochrone_tool: { - minLength: 550, - minWords: 80, - minPhrases: 10 - }, - matrix_tool: { - minLength: 850, - minWords: 120, - minPhrases: 16 - }, - reverse_geocode_tool: { - minLength: 750, - minWords: 110, - minPhrases: 14 - }, - static_map_image_tool: { - minLength: 900, - minWords: 130, - minPhrases: 18 - } - }; - - test.each(Object.entries(baselines))( - '%s maintains baseline metrics', - (toolName, baseline) => { - const tool = tools.find((t) => t.name === toolName); - expect(tool).toBeDefined(); - - const metrics = analyzeDescription(tool!.name, tool!.description); - - // Check against baselines - expect(metrics.length).toBeGreaterThanOrEqual(baseline.minLength); - expect(metrics.wordCount).toBeGreaterThanOrEqual(baseline.minWords); - expect(metrics.phraseCount).toBeGreaterThanOrEqual(baseline.minPhrases); - - // Log actual metrics - console.log(`${toolName}:`, { - length: metrics.length, - words: metrics.wordCount, - phrases: metrics.phraseCount - }); - } - ); - }); - - describe('Semantic richness baseline', () => { - test('descriptions contain diverse vocabulary', () => { - mapboxApiTools.forEach((tool) => { - const words = tool.description.toLowerCase().split(/\s+/); - const uniqueWords = new Set( - words.filter((w) => w.length > 4) // Filter out short words - ); - - // Should have good vocabulary diversity (at least 40% unique meaningful words) - const diversityRatio = uniqueWords.size / words.length; - expect(diversityRatio).toBeGreaterThan(0.4); - - console.log( - `${tool.name}: ${uniqueWords.size} unique words / ${words.length} total = ${(diversityRatio * 100).toFixed(1)}%` - ); - }); - }); - - test('descriptions include domain-specific terminology', () => { - const domainTerms = [ - 'coordinates', - 'latitude', - 'longitude', - 'geocod', - 'route', - 'navigation', - 'location', - 'address', - 'poi', - 'place', - 'distance', - 'travel', - 'time', - 'map', - 'geojson' - ]; - - mapboxApiTools.forEach((tool) => { - const desc = tool.description.toLowerCase(); - const termsFound = domainTerms.filter((term) => desc.includes(term)); - - // Each tool should use at least 3 domain terms - expect(termsFound.length).toBeGreaterThanOrEqual(3); - }); - }); - }); - - describe('Structure baseline', () => { - test('descriptions follow consistent pattern', () => { - const pattern = { - hasPrimaryFunction: true, // First sentence describes main function - hasReturnsInfo: true, // Mentions what it returns - hasUseCases: true, // Includes use case examples - hasRelatedTools: false // Optional but recommended - }; - - mapboxApiTools.forEach((tool) => { - const metrics = analyzeDescription(tool.name, tool.description); - - expect(metrics.hasUseCases).toBe(pattern.hasUseCases); - - // Should mention what it returns - const desc = tool.description.toLowerCase(); - const hasReturns = - desc.includes('returns') || - desc.includes('output') || - desc.includes('provides'); - expect(hasReturns).toBe(pattern.hasReturnsInfo); - }); - }); - - test('no description has regressed to single sentence', () => { - mapboxApiTools.forEach((tool) => { - // Count sentences (rough heuristic) - const sentences = tool.description - .split(/[.!?]/) - .filter((s) => s.trim().length > 20); - - // Should have multiple sentences/sections - expect(sentences.length).toBeGreaterThan(3); - }); - }); - }); - - describe('Cross-references baseline', () => { - test('related tools reference each other', () => { - const crossReferences = [ - { - tool: 'search_and_geocode_tool', - shouldMention: ['category_search_tool'] - }, - { - tool: 'category_search_tool', - shouldMention: ['search_and_geocode_tool'] - }, - { - tool: 'reverse_geocode_tool', - shouldMention: ['search_and_geocode_tool'] - }, - { - tool: 'directions_tool', - shouldMention: ['matrix_tool', 'isochrone_tool'], - atLeastOne: true - }, - { - tool: 'matrix_tool', - shouldMention: ['directions_tool', 'isochrone_tool'], - atLeastOne: true - } - ]; - - crossReferences.forEach(({ tool, shouldMention, atLeastOne = false }) => { - const toolObj = tools.find((t) => t.name === tool); - expect(toolObj).toBeDefined(); - - const desc = toolObj!.description.toLowerCase(); - - if (atLeastOne) { - // At least one reference should be present - const hasReference = shouldMention.some((ref) => desc.includes(ref)); - expect(hasReference).toBe(true); - } else { - // All references should be present - shouldMention.forEach((ref) => { - expect(desc).toContain(ref); - }); - } - }); - }); - }); -}); diff --git a/test/tools/description-quality.test.ts b/test/tools/description-quality.test.ts deleted file mode 100644 index 62d1f0b..0000000 --- a/test/tools/description-quality.test.ts +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) Mapbox, Inc. -// Licensed under the MIT License. - -import { describe, test, expect } from 'vitest'; -import { getAllTools } from '../../src/tools/toolRegistry.js'; - -describe('Tool Description Quality Standards', () => { - const tools = getAllTools(); - const mapboxApiTools = tools.filter( - (t) => - t.name !== 'version_tool' && - t.name !== 'resource_reader_tool' && - t.name !== 'category_list_tool' // Deprecated tool - ); - - describe('Minimum quality thresholds', () => { - test.each(mapboxApiTools)( - '$name has comprehensive description (>200 chars)', - (tool) => { - expect(tool.description.length).toBeGreaterThan(200); - } - ); - - test.each(mapboxApiTools)( - '$name includes use cases or "when to use" guidance', - (tool) => { - const desc = tool.description.toLowerCase(); - const hasUseCases = - desc.includes('common use cases:') || - desc.includes('use this when:') || - desc.includes('use cases:'); - - expect(hasUseCases).toBe(true); - } - ); - - test.each(mapboxApiTools)( - '$name includes context about what it returns', - (tool) => { - const desc = tool.description.toLowerCase(); - const hasReturns = - desc.includes('returns') || - desc.includes('output') || - desc.includes('provides'); - - expect(hasReturns).toBe(true); - } - ); - }); - - describe('Semantic richness for RAG', () => { - test.each(mapboxApiTools)( - '$name has multiple semantic concepts (>5 phrases)', - (tool) => { - // Count phrases by splitting on common separators - const phrases = tool.description - .split(/[.,;:\n-]/) - .filter((p) => p.trim().length > 10); - expect(phrases.length).toBeGreaterThan(5); - } - ); - - test.each(mapboxApiTools)( - '$name includes examples or specific scenarios', - (tool) => { - const desc = tool.description.toLowerCase(); - // Should have quotes (examples) or specific terminology - const hasExamples = - desc.includes('"') || - desc.includes('e.g.') || - desc.includes('such as') || - desc.includes('for example') || - /:\s+-\s+/.test(desc); // Bulleted list pattern - - expect(hasExamples).toBe(true); - } - ); - }); - - describe('Tool-specific keywords', () => { - test('search_and_geocode_tool includes geocoding keywords', () => { - const tool = tools.find((t) => t.name === 'search_and_geocode_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include key geocoding terms - expect(desc).toMatch(/geocod|coordinate|latitude|longitude|address/); - }); - - test('directions_tool includes routing/navigation keywords', () => { - const tool = tools.find((t) => t.name === 'directions_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include key routing terms - expect(desc).toMatch( - /route|routing|navigation|directions|turn-by-turn|eta/ - ); - }); - - test('category_search_tool includes category/type keywords', () => { - const tool = tools.find((t) => t.name === 'category_search_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include category examples - expect(desc).toMatch( - /restaurant|hotel|gas station|coffee|pharmacy|hospital/ - ); - }); - - test('isochrone_tool includes reachability keywords', () => { - const tool = tools.find((t) => t.name === 'isochrone_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include reachability concepts - expect(desc).toMatch(/reachab|coverage|service area|accessible/); - }); - - test('matrix_tool includes optimization keywords', () => { - const tool = tools.find((t) => t.name === 'matrix_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include logistics/optimization terms - expect(desc).toMatch( - /matrix|many-to-many|logistics|optimization|multiple/ - ); - }); - - test('reverse_geocode_tool includes address lookup keywords', () => { - const tool = tools.find((t) => t.name === 'reverse_geocode_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include reverse geocoding concepts - expect(desc).toMatch( - /reverse|coordinates to address|postal code|zip code/ - ); - }); - - test('static_map_image_tool includes visualization keywords', () => { - const tool = tools.find((t) => t.name === 'static_map_image_tool'); - const desc = tool!.description.toLowerCase(); - - // Should include static image concepts - expect(desc).toMatch(/static|image|snapshot|thumbnail|url|visual/); - }); - }); - - describe('Related tools guidance', () => { - test('search tools reference each other appropriately', () => { - const searchTool = tools.find( - (t) => t.name === 'search_and_geocode_tool' - ); - const categoryTool = tools.find((t) => t.name === 'category_search_tool'); - - // search_and_geocode_tool should mention category_search_tool - expect(searchTool!.description.toLowerCase()).toContain( - 'category_search' - ); - - // category_search_tool should mention search_and_geocode_tool - expect(categoryTool!.description.toLowerCase()).toContain( - 'search_and_geocode' - ); - }); - - test('geocoding tools reference each other', () => { - const searchTool = tools.find( - (t) => t.name === 'search_and_geocode_tool' - ); - const reverseTool = tools.find((t) => t.name === 'reverse_geocode_tool'); - - // Should cross-reference for forward/reverse geocoding - expect(reverseTool!.description.toLowerCase()).toContain( - 'search_and_geocode' - ); - }); - - test('routing tools reference each other', () => { - const directionsTool = tools.find((t) => t.name === 'directions_tool'); - const matrixTool = tools.find((t) => t.name === 'matrix_tool'); - const isochroneTool = tools.find((t) => t.name === 'isochrone_tool'); - - const directionsDesc = directionsTool!.description.toLowerCase(); - const matrixDesc = matrixTool!.description.toLowerCase(); - - // Directions should reference matrix or isochrone - expect( - directionsDesc.includes('matrix') || - directionsDesc.includes('isochrone') - ).toBe(true); - - // Matrix should reference directions or isochrone - expect( - matrixDesc.includes('directions') || matrixDesc.includes('isochrone') - ).toBe(true); - }); - }); -}); diff --git a/test/tools/semantic-tool-selection.test.ts b/test/tools/semantic-tool-selection.test.ts deleted file mode 100644 index 186699e..0000000 --- a/test/tools/semantic-tool-selection.test.ts +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright (c) Mapbox, Inc. -// Licensed under the MIT License. - -/** - * Semantic Tool Selection Tests - * - * These tests validate that tool descriptions are optimized for RAG-based - * semantic matching using OpenAI embeddings (text-embedding-3-small). - * - * Tests only run when OPENAI_API_KEY environment variable is set. - * This allows: - * - Local testing during development - * - CI testing with secrets - * - Skipping in environments without API access - */ - -import { describe, test, expect, beforeAll } from 'vitest'; -import { getAllTools } from '../../src/tools/toolRegistry.js'; - -// Skip all tests if no OpenAI API key -const hasApiKey = !!process.env.OPENAI_API_KEY; -const describeIfApiKey = hasApiKey ? describe : describe.skip; - -interface EmbeddingResponse { - data: Array<{ - embedding: number[]; - }>; -} - -/** - * Compute cosine similarity between two embedding vectors - */ -function cosineSimilarity(a: number[], b: number[]): number { - if (a.length !== b.length) { - throw new Error('Vectors must have same length'); - } - - let dotProduct = 0; - let normA = 0; - let normB = 0; - - for (let i = 0; i < a.length; i++) { - dotProduct += a[i] * b[i]; - normA += a[i] * a[i]; - normB += b[i] * b[i]; - } - - return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); -} - -/** - * Get embedding for text using OpenAI API - */ -async function getEmbedding(text: string): Promise { - const response = await fetch('https://api.openai.com/v1/embeddings', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${process.env.OPENAI_API_KEY}` - }, - body: JSON.stringify({ - model: 'text-embedding-3-small', - input: text - }) - }); - - if (!response.ok) { - throw new Error(`OpenAI API error: ${response.statusText}`); - } - - const data = (await response.json()) as EmbeddingResponse; - return data.data[0].embedding; -} - -/** - * Compute semantic similarity between a query and tool description - */ -async function computeToolSimilarity( - query: string, - toolName: string, - toolDescription: string -): Promise { - // Mimic how RAG selector embeds tools - const toolText = `${toolName}\n${toolDescription}`; - - const [queryEmbedding, toolEmbedding] = await Promise.all([ - getEmbedding(query), - getEmbedding(toolText) - ]); - - return cosineSimilarity(queryEmbedding, toolEmbedding); -} - -/** - * Find top-k tools most similar to query - */ -async function findTopTools( - query: string, - k: number = 3 -): Promise> { - const tools = getAllTools(); - - const scores = await Promise.all( - tools.map(async (tool) => { - const score = await computeToolSimilarity( - query, - tool.name, - tool.description - ); - return { name: tool.name, score }; - }) - ); - - // Sort by similarity (highest first) - scores.sort((a, b) => b.score - a.score); - - return scores.slice(0, k); -} - -describeIfApiKey('Semantic Tool Selection', () => { - // Add timeout for API calls - const apiTimeout = 30000; - - beforeAll(() => { - if (!hasApiKey) { - console.log( - '\nSkipping semantic tool selection tests: OPENAI_API_KEY not set' - ); - } - }); - - describe('Search and geocoding queries', () => { - test( - 'query "find coffee shops nearby" should match category_search_tool', - async () => { - const topTools = await findTopTools('find coffee shops nearby', 3); - const toolNames = topTools.map((t) => t.name); - - // category_search_tool should be in top 3 - expect(toolNames).toContain('category_search_tool'); - - // Log for debugging - console.log('Query: "find coffee shops nearby"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - - test( - 'query "where is Starbucks on 5th Avenue" should match search_and_geocode_tool', - async () => { - const topTools = await findTopTools( - 'where is Starbucks on 5th Avenue', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - // search_and_geocode_tool should be in top 3 - expect(toolNames).toContain('search_and_geocode_tool'); - - console.log('Query: "where is Starbucks on 5th Avenue"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - - test( - 'query "what is the address at these coordinates" should match reverse_geocode_tool', - async () => { - const topTools = await findTopTools( - 'what is the address at these coordinates', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - // reverse_geocode_tool should be in top 3 - expect(toolNames).toContain('reverse_geocode_tool'); - - console.log('Query: "what is the address at these coordinates"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - - test( - 'query "convert address to coordinates" should match search_and_geocode_tool', - async () => { - const topTools = await findTopTools( - 'convert this address to coordinates', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - // search_and_geocode_tool should be in top 3 (forward geocoding) - expect(toolNames).toContain('search_and_geocode_tool'); - - console.log('Query: "convert this address to coordinates"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - }); - - describe('Routing and navigation queries', () => { - test( - 'query "driving directions from A to B" should match directions_tool', - async () => { - const topTools = await findTopTools( - 'driving directions from LAX to Hollywood', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - expect(toolNames).toContain('directions_tool'); - - console.log('Query: "driving directions from LAX to Hollywood"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - - test( - 'query "travel time matrix between multiple locations" should match matrix_tool', - async () => { - const topTools = await findTopTools( - 'calculate travel times from warehouse to 10 addresses', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - expect(toolNames).toContain('matrix_tool'); - - console.log( - 'Query: "calculate travel times from warehouse to 10 addresses"' - ); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - - test( - 'query "areas reachable in 30 minutes" should match isochrone_tool', - async () => { - const topTools = await findTopTools( - 'show me areas I can reach in 30 minutes', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - expect(toolNames).toContain('isochrone_tool'); - - console.log('Query: "show me areas I can reach in 30 minutes"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - }); - - describe('Visualization queries', () => { - test( - 'query "generate map image" should match static_map_image_tool', - async () => { - const topTools = await findTopTools( - 'create a map image showing this location', - 3 - ); - const toolNames = topTools.map((t) => t.name); - - expect(toolNames).toContain('static_map_image_tool'); - - console.log('Query: "create a map image showing this location"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - }); - - describe('Category vs specific place disambiguation', () => { - test( - 'query "all restaurants" should prefer category_search over search_and_geocode', - async () => { - const topTools = await findTopTools( - 'show me all restaurants nearby', - 5 - ); - - // Find positions of both tools - const categoryIndex = topTools.findIndex( - (t) => t.name === 'category_search_tool' - ); - const searchIndex = topTools.findIndex( - (t) => t.name === 'search_and_geocode_tool' - ); - - // category_search should rank higher than search_and_geocode - expect(categoryIndex).toBeLessThan(searchIndex); - - console.log('Query: "show me all restaurants nearby"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - - test( - 'query "specific restaurant name" should prefer search_and_geocode over category_search', - async () => { - const topTools = await findTopTools('find McDonalds on Main Street', 5); - - const searchIndex = topTools.findIndex( - (t) => t.name === 'search_and_geocode_tool' - ); - const categoryIndex = topTools.findIndex( - (t) => t.name === 'category_search_tool' - ); - - // search_and_geocode should rank higher than category_search - expect(searchIndex).toBeLessThan(categoryIndex); - - console.log('Query: "find McDonalds on Main Street"'); - console.log('Top tools:', topTools); - }, - apiTimeout - ); - }); - - describe('Semantic similarity thresholds', () => { - test( - 'relevant tools should have similarity > 0.5', - async () => { - const queries = [ - { query: 'find directions', expectedTool: 'directions_tool' }, - { - query: 'search for gas stations', - expectedTool: 'category_search_tool' - }, - { - query: 'geocode an address', - expectedTool: 'search_and_geocode_tool' - } - ]; - - for (const { query, expectedTool } of queries) { - const topTools = await findTopTools(query, 5); - const tool = topTools.find((t) => t.name === expectedTool); - - expect(tool).toBeDefined(); - expect(tool!.score).toBeGreaterThan(0.5); - - console.log(`Query: "${query}" -> ${expectedTool}: ${tool!.score}`); - } - }, - apiTimeout * 3 - ); - }); -}); - -// Export warning if tests are skipped -if (!hasApiKey) { - console.warn( - '\n⚠️ Semantic tool selection tests skipped: Set OPENAI_API_KEY to run these tests\n' - ); -}