From 5ce5e99b20297773c241d2f69118ad9bdb958815 Mon Sep 17 00:00:00 2001 From: prosdev Date: Sun, 23 Nov 2025 03:18:05 -0800 Subject: [PATCH] fix(explore): search file content instead of filename for similar code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: explore similar searched filename as text → 0 results Fix: Read file content and search its embeddings Changes: - Extract file utilities (resolveFilePath, readFileContent, etc.) - Add 22 unit tests for utilities (100% coverage) - Refactor explore similar to use utilities - Add --threshold option (default: 0.5) - Improve error messages for missing/empty files Before: $ dev explore similar file.ts ⚠ No similar code found # searched "file.ts" as text After: $ dev explore similar packages/core/src/vector/store.ts 1. packages/core/src/vector/README.md (64.1% similar) 2. packages/core/src/vector/embedder.ts (61.7% similar) Tests: 22/22 passing ✅ --- packages/cli/src/commands/explore.ts | 24 ++- packages/cli/src/utils/file.test.ts | 237 +++++++++++++++++++++++++++ packages/cli/src/utils/file.ts | 72 ++++++++ 3 files changed, 329 insertions(+), 4 deletions(-) create mode 100644 packages/cli/src/utils/file.test.ts create mode 100644 packages/cli/src/utils/file.ts diff --git a/packages/cli/src/commands/explore.ts b/packages/cli/src/commands/explore.ts index 1dbb7da..1c6527d 100644 --- a/packages/cli/src/commands/explore.ts +++ b/packages/cli/src/commands/explore.ts @@ -72,6 +72,7 @@ explore .description('Find code similar to a file') .argument('', 'File path') .option('-l, --limit ', 'Number of results', '5') + .option('-t, --threshold ', 'Similarity threshold (0-1)', '0.5') .action(async (file: string, options) => { const spinner = ora('Finding similar code...').start(); @@ -84,19 +85,34 @@ explore return; } + // Prepare file for search (read content, resolve paths) + spinner.text = 'Reading file content...'; + const { prepareFileForSearch } = await import('../utils/file.js'); + + let fileInfo: Awaited>; + try { + fileInfo = await prepareFileForSearch(config.repositoryPath, file); + } catch (error) { + spinner.fail((error as Error).message); + process.exit(1); + return; + } + const indexer = new RepositoryIndexer(config); await indexer.initialize(); - const results = await indexer.search(file, { + // Search using file content, not filename + spinner.text = 'Searching for similar code...'; + const results = await indexer.search(fileInfo.content, { limit: Number.parseInt(options.limit, 10) + 1, - scoreThreshold: 0.7, + scoreThreshold: Number.parseFloat(options.threshold), }); - // Filter out the file itself + // Filter out the file itself (exact path match) const similar = results .filter((r) => { const meta = r.metadata as { path: string }; - return !meta.path.includes(file); + return meta.path !== fileInfo.relativePath; }) .slice(0, Number.parseInt(options.limit, 10)); diff --git a/packages/cli/src/utils/file.test.ts b/packages/cli/src/utils/file.test.ts new file mode 100644 index 0000000..35b4ee3 --- /dev/null +++ b/packages/cli/src/utils/file.test.ts @@ -0,0 +1,237 @@ +/** + * Unit tests for file utilities + * Target: 100% coverage for pure utility functions + */ + +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { normalizeFilePath, prepareFileForSearch, readFileContent, resolveFilePath } from './file'; + +describe('File Utilities', () => { + describe('resolveFilePath', () => { + it('should resolve relative path to absolute', () => { + const repoPath = '/home/user/project'; + const filePath = 'src/index.ts'; + + const result = resolveFilePath(repoPath, filePath); + + expect(result).toBe('/home/user/project/src/index.ts'); + }); + + it('should handle already absolute paths', () => { + const repoPath = '/home/user/project'; + const filePath = '/home/user/project/src/index.ts'; + + const result = resolveFilePath(repoPath, filePath); + + expect(result).toBe('/home/user/project/src/index.ts'); + }); + + it('should handle paths with ../', () => { + const repoPath = '/home/user/project'; + const filePath = 'src/../lib/utils.ts'; + + const result = resolveFilePath(repoPath, filePath); + + expect(result).toBe('/home/user/project/lib/utils.ts'); + }); + + it('should handle current directory', () => { + const repoPath = '/home/user/project'; + const filePath = './src/index.ts'; + + const result = resolveFilePath(repoPath, filePath); + + expect(result).toBe('/home/user/project/src/index.ts'); + }); + }); + + describe('normalizeFilePath', () => { + it('should create relative path from absolute', () => { + const repoPath = '/home/user/project'; + const absolutePath = '/home/user/project/src/index.ts'; + + const result = normalizeFilePath(repoPath, absolutePath); + + expect(result).toBe('src/index.ts'); + }); + + it('should handle paths in subdirectories', () => { + const repoPath = '/home/user/project'; + const absolutePath = '/home/user/project/packages/core/src/index.ts'; + + const result = normalizeFilePath(repoPath, absolutePath); + + expect(result).toBe('packages/core/src/index.ts'); + }); + + it('should handle same path', () => { + const repoPath = '/home/user/project'; + const absolutePath = '/home/user/project'; + + const result = normalizeFilePath(repoPath, absolutePath); + + expect(result).toBe(''); + }); + + it('should handle paths outside repository', () => { + const repoPath = '/home/user/project'; + const absolutePath = '/home/user/other/file.ts'; + + const result = normalizeFilePath(repoPath, absolutePath); + + expect(result).toContain('..'); + }); + }); + + describe('readFileContent', () => { + let tempDir: string; + let testFile: string; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'file-test-')); + testFile = path.join(tempDir, 'test.txt'); + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it('should read file content', async () => { + const content = 'Hello, World!'; + await fs.writeFile(testFile, content); + + const result = await readFileContent(testFile); + + expect(result).toBe(content); + }); + + it('should read multiline content', async () => { + const content = 'Line 1\nLine 2\nLine 3'; + await fs.writeFile(testFile, content); + + const result = await readFileContent(testFile); + + expect(result).toBe(content); + }); + + it('should throw error for non-existent file', async () => { + const nonExistent = path.join(tempDir, 'does-not-exist.txt'); + + await expect(readFileContent(nonExistent)).rejects.toThrow('File not found'); + }); + + it('should throw error for empty file', async () => { + await fs.writeFile(testFile, ''); + + await expect(readFileContent(testFile)).rejects.toThrow('File is empty'); + }); + + it('should throw error for whitespace-only file', async () => { + await fs.writeFile(testFile, ' \n \t \n '); + + await expect(readFileContent(testFile)).rejects.toThrow('File is empty'); + }); + + it('should handle files with leading/trailing whitespace', async () => { + const content = ' \n Content \n '; + await fs.writeFile(testFile, content); + + const result = await readFileContent(testFile); + + expect(result).toBe(content); + expect(result.trim()).toBe('Content'); + }); + + it('should handle large files', async () => { + const content = 'x'.repeat(10000); + await fs.writeFile(testFile, content); + + const result = await readFileContent(testFile); + + expect(result.length).toBe(10000); + }); + + it('should handle files with special characters', async () => { + const content = 'Hello 🚀 World\n中文\nΨ'; + await fs.writeFile(testFile, content, 'utf-8'); + + const result = await readFileContent(testFile); + + expect(result).toBe(content); + }); + }); + + describe('prepareFileForSearch', () => { + let tempDir: string; + let testFile: string; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'file-test-')); + testFile = path.join(tempDir, 'test.txt'); + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it('should prepare file for search', async () => { + const content = 'Test content'; + await fs.writeFile(testFile, content); + + const result = await prepareFileForSearch(tempDir, 'test.txt'); + + expect(result.content).toBe(content); + expect(result.absolutePath).toBe(testFile); + expect(result.relativePath).toBe('test.txt'); + }); + + it('should handle nested directories', async () => { + const subDir = path.join(tempDir, 'src', 'utils'); + await fs.mkdir(subDir, { recursive: true }); + const nestedFile = path.join(subDir, 'helper.ts'); + await fs.writeFile(nestedFile, 'export function helper() {}'); + + const result = await prepareFileForSearch(tempDir, 'src/utils/helper.ts'); + + expect(result.content).toContain('helper'); + expect(result.relativePath).toBe('src/utils/helper.ts'); + }); + + it('should return correct FileContentResult structure', async () => { + await fs.writeFile(testFile, 'content'); + + const result = await prepareFileForSearch(tempDir, 'test.txt'); + + expect(result).toHaveProperty('content'); + expect(result).toHaveProperty('absolutePath'); + expect(result).toHaveProperty('relativePath'); + expect(typeof result.content).toBe('string'); + expect(typeof result.absolutePath).toBe('string'); + expect(typeof result.relativePath).toBe('string'); + }); + + it('should throw error for non-existent file', async () => { + await expect(prepareFileForSearch(tempDir, 'nonexistent.txt')).rejects.toThrow( + 'File not found' + ); + }); + + it('should throw error for empty file', async () => { + await fs.writeFile(testFile, ''); + + await expect(prepareFileForSearch(tempDir, 'test.txt')).rejects.toThrow('File is empty'); + }); + + it('should handle absolute path input', async () => { + await fs.writeFile(testFile, 'content'); + + const result = await prepareFileForSearch(tempDir, testFile); + + expect(result.content).toBe('content'); + expect(result.relativePath).toBe('test.txt'); + }); + }); +}); diff --git a/packages/cli/src/utils/file.ts b/packages/cli/src/utils/file.ts new file mode 100644 index 0000000..da6d400 --- /dev/null +++ b/packages/cli/src/utils/file.ts @@ -0,0 +1,72 @@ +/** + * File utility functions for CLI commands + * Pure functions for file operations and validation + */ + +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; + +/** + * Resolve a file path relative to the repository root + */ +export function resolveFilePath(repositoryPath: string, filePath: string): string { + return path.resolve(repositoryPath, filePath); +} + +/** + * Normalize a file path to be relative to repository root + */ +export function normalizeFilePath(repositoryPath: string, absolutePath: string): string { + return path.relative(repositoryPath, absolutePath); +} + +/** + * Read and validate file content + * @throws Error if file doesn't exist or is empty + */ +export async function readFileContent(filePath: string): Promise { + // Check if file exists + try { + await fs.access(filePath); + } catch { + throw new Error(`File not found: ${filePath}`); + } + + // Read file content + const content = await fs.readFile(filePath, 'utf-8'); + + // Validate content + if (content.trim().length === 0) { + throw new Error(`File is empty: ${filePath}`); + } + + return content; +} + +/** + * Result of reading file for similarity search + */ +export interface FileContentResult { + content: string; + absolutePath: string; + relativePath: string; +} + +/** + * Prepare a file for similarity search + * Resolves path, reads content, and normalizes paths + */ +export async function prepareFileForSearch( + repositoryPath: string, + filePath: string +): Promise { + const absolutePath = resolveFilePath(repositoryPath, filePath); + const content = await readFileContent(absolutePath); + const relativePath = normalizeFilePath(repositoryPath, absolutePath); + + return { + content, + absolutePath, + relativePath, + }; +}