From 3ae3ee72c08e052d32fffd3dfc7a31577f366ed6 Mon Sep 17 00:00:00 2001 From: zver-in Date: Sun, 5 Oct 2025 16:36:14 +0300 Subject: [PATCH 1/2] refactor: translate Russian comments to English in filesystem tools - Translate comments in read_file function - Update search_in_files docstring - Add max_bytes configuration to assistant.yaml - Update README with current filesystem tools documentation --- README.md | 3 +- agents/assistant.yaml | 5 +++ src/tools/factory.py | 3 +- src/tools/filesystem.py | 94 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 96 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 169b56c..1723053 100644 --- a/README.md +++ b/README.md @@ -180,9 +180,10 @@ Note: The `--agent` must point to a YAML file that exists at the specified path. ### Filesystem Tools - `list_directory` - List contents of a directory -- `read_file` - Read file contents +- `read_file` - Read file contents with support for line ranges and size limits - `write_file` - Write to a file - `view_file` - View file contents with syntax highlighting +- `search_in_files` - Search for text within files under a given directory ### Git Tools diff --git a/agents/assistant.yaml b/agents/assistant.yaml index a8e848f..add5e64 100644 --- a/agents/assistant.yaml +++ b/agents/assistant.yaml @@ -8,6 +8,11 @@ tools: config: workdir: . - name: read_file + type: filesystem + config: + workdir: . + max_bytes: 200000 + - name: search_in_files type: filesystem config: workdir: . diff --git a/src/tools/factory.py b/src/tools/factory.py index 04405a4..8746f19 100644 --- a/src/tools/factory.py +++ b/src/tools/factory.py @@ -1,5 +1,5 @@ from typing import Any, Dict, Callable -from .filesystem import build_list_directory, build_read_file +from .filesystem import build_list_directory, build_read_file, build_search_in_files from .git import ( build_git_changed_files, build_git_diff, @@ -29,6 +29,7 @@ def __init__(self): "filesystem": { "list_directory": build_list_directory, "read_file": build_read_file, + "search_in_files": build_search_in_files, }, "git": { "git_changed_files": build_git_changed_files, diff --git a/src/tools/filesystem.py b/src/tools/filesystem.py index 17d529c..b51faac 100644 --- a/src/tools/filesystem.py +++ b/src/tools/filesystem.py @@ -106,12 +106,19 @@ def build_read_file(config: Dict[str, Any]): default_max = int(config.get("max_bytes", 200_000)) @tool("read_file", return_direct=False) - def read_file(path: str, max_bytes: Optional[int] = None) -> str: - """Read a file within the workdir with size limit and deny-pattern checks. + def read_file( + path: str, + max_bytes: Optional[int] = None, + start_line: Optional[int] = None, + end_line: Optional[int] = None, + ) -> str: + """Read a file within the workdir with size limit, deny-pattern checks, and optional line ranges. Parameters: - path: relative path to a file inside workdir - max_bytes: override read size limit (defaults to config) + - start_line: first line number to read (1-based, optional) + - end_line: last line number to read (inclusive, optional) Security policies: - leaving the workdir is forbidden @@ -128,21 +135,34 @@ def read_file(path: str, max_bytes: Optional[int] = None) -> str: rel = os.path.relpath(target, workdir) if _is_denied(rel, deny): return f"Access denied by deny policy for: {path}" + try: size = os.path.getsize(target) - if size > limit: + if size > limit and (start_line is None and end_line is None): return f"File is too large ({size} bytes), limit {limit} bytes: {path}" except PermissionError as e: return f"Permission denied when checking file size for {path}: {e}" except FileNotFoundError: return f"File not found while checking size: {path}" - except OSError as e: - # If stat fails for other OS reasons, continue to attempt reading with limit + except OSError: size = -1 + try: with open(target, "r", encoding="utf-8", errors="replace") as f: - content = f.read(limit) - return content + if start_line is not None or end_line is not None: + # Line-by-line reading + content_lines = [] + for i, line in enumerate(f, start=1): + if start_line is not None and i < start_line: + continue + if end_line is not None and i > end_line: + break + content_lines.append(line.rstrip("\n")) + return "\n".join(content_lines) + else: + # Old behavior: byte limit + return f.read(limit) + except UnicodeDecodeError: return f"Unable to decode file as UTF-8: {path}" except FileNotFoundError: @@ -155,3 +175,63 @@ def read_file(path: str, max_bytes: Optional[int] = None) -> str: return f"OS error while reading file {path}: {e}" return read_file + +def build_search_in_files(config: Dict[str, Any]): + """Create a parameterized search_in_files tool bound to config. + + Config supported keys: + - workdir: str (required) + - deny: list[str] (optional) + - max_bytes: int (optional, default 200_000) — maximum bytes to read from a single file + """ + workdir = _abspath(config.get("workdir", ".")) + deny: List[str] = list(config.get("deny", []) or []) + default_max = int(config.get("max_bytes", 200_000)) + + @tool("search_in_files", return_direct=False) + def search_in_files( + query: str, + path: str = ".", + file_glob: Optional[str] = None, + max_matches: int = 50, + ) -> str: + """Search for text within files under the given path inside the workdir. + + Parameters: + - query: text to search + - path: directory path relative to workdir (default '.') + - file_glob: optional file pattern (e.g. '*.py') + - max_matches: maximum number of matches to return + """ + target_dir = _abspath(os.path.join(workdir, path)) + if not _ensure_within(workdir, target_dir): + return f"Access denied: path is outside the working directory ({path})" + if not os.path.exists(target_dir): + return f"Directory not found: {path}" + if not os.path.isdir(target_dir): + return f"Not a directory: {path}" + + matches = [] + for root, _, files in os.walk(target_dir): + for filename in files: + rel = os.path.relpath(os.path.join(root, filename), workdir) + if _is_denied(rel, deny): + continue + if file_glob and not fnmatch.fnmatch(filename, file_glob): + continue + file_path = os.path.join(root, filename) + try: + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + for lineno, line in enumerate(f, start=1): + if query in line: + matches.append(f"{rel}:{lineno}: {line.strip()}") + if len(matches) >= max_matches: + return "\n".join(matches) + except Exception: + continue + + if not matches: + return f"No matches for '{query}' in {path}" + return "\n".join(matches) + + return search_in_files From af9ad3c7040d1a033c838fe01927c1f1a8c5adae Mon Sep 17 00:00:00 2001 From: zver-in Date: Sun, 5 Oct 2025 16:48:42 +0300 Subject: [PATCH 2/2] Improve file handling and validation in filesystem.py - Make file size check logic more explicit when using line ranges - Add validation for start_line and end_line parameters - Clarify max_matches behavior in search_in_files docstring - Improve error messages for invalid line number inputs --- src/tools/filesystem.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/tools/filesystem.py b/src/tools/filesystem.py index b51faac..4afad3d 100644 --- a/src/tools/filesystem.py +++ b/src/tools/filesystem.py @@ -136,16 +136,26 @@ def read_file( if _is_denied(rel, deny): return f"Access denied by deny policy for: {path}" - try: - size = os.path.getsize(target) - if size > limit and (start_line is None and end_line is None): - return f"File is too large ({size} bytes), limit {limit} bytes: {path}" - except PermissionError as e: - return f"Permission denied when checking file size for {path}: {e}" - except FileNotFoundError: - return f"File not found while checking size: {path}" - except OSError: - size = -1 + # Only check file size if we're reading the whole file (no line range specified) + if start_line is None and end_line is None: + try: + size = os.path.getsize(target) + if size > limit: + return f"File is too large ({size} bytes), limit {limit} bytes: {path}" + except PermissionError as e: + return f"Permission denied when checking file size for {path}: {e}" + except FileNotFoundError: + return f"File not found while checking size: {path}" + except OSError: + size = -1 + + # Validate line numbers if provided + if start_line is not None and start_line < 1: + return f"Invalid start_line: must be 1 or greater, got {start_line}" + if end_line is not None and end_line < 1: + return f"Invalid end_line: must be 1 or greater, got {end_line}" + if start_line is not None and end_line is not None and start_line > end_line: + return f"Invalid line range: start_line ({start_line}) must be less than or equal to end_line ({end_line})" try: with open(target, "r", encoding="utf-8", errors="replace") as f: @@ -201,7 +211,7 @@ def search_in_files( - query: text to search - path: directory path relative to workdir (default '.') - file_glob: optional file pattern (e.g. '*.py') - - max_matches: maximum number of matches to return + - max_matches: maximum number of matches to return (first N matches found will be returned) """ target_dir = _abspath(os.path.join(workdir, path)) if not _ensure_within(workdir, target_dir):