From 3ce1546a334c3b0751d7ad3d696420019c2aa2c0 Mon Sep 17 00:00:00 2001 From: bitifirefly Date: Sun, 22 Mar 2026 08:59:10 +0000 Subject: [PATCH 1/4] fix(security): harden filesystem allowed_dir boundary checks --- opencane/agent/tools/filesystem.py | 7 +++++-- tests/test_filesystem_tools_workspace.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/opencane/agent/tools/filesystem.py b/opencane/agent/tools/filesystem.py index cfc8fd2457f..fa9d7bb7345 100644 --- a/opencane/agent/tools/filesystem.py +++ b/opencane/agent/tools/filesystem.py @@ -12,8 +12,11 @@ def _resolve_path(path: str, workspace: Path | None = None, allowed_dir: Path | if not target.is_absolute() and workspace is not None: target = workspace / target resolved = target.resolve() - if allowed_dir and not str(resolved).startswith(str(allowed_dir.resolve())): - raise PermissionError(f"Path {path} is outside allowed directory {allowed_dir}") + if allowed_dir: + try: + resolved.relative_to(allowed_dir.resolve()) + except ValueError: + raise PermissionError(f"Path {path} is outside allowed directory {allowed_dir}") from None return resolved diff --git a/tests/test_filesystem_tools_workspace.py b/tests/test_filesystem_tools_workspace.py index d838f561efd..cb0aae0ab8e 100644 --- a/tests/test_filesystem_tools_workspace.py +++ b/tests/test_filesystem_tools_workspace.py @@ -62,3 +62,19 @@ async def test_read_file_tool_still_enforces_allowed_dir_with_workspace(tmp_path result = await read_tool.execute(path="../outside.txt") assert result.startswith("Error: Path ../outside.txt is outside allowed directory") + + +@pytest.mark.asyncio +async def test_allowed_dir_check_rejects_startswith_path_bypass(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + workspace.mkdir(parents=True) + evil_dir = tmp_path / "workspace_evil" + evil_dir.mkdir(parents=True) + secret = evil_dir / "secret.txt" + secret.write_text("leak", encoding="utf-8") + + read_tool = ReadFileTool(workspace=workspace, allowed_dir=workspace) + result = await read_tool.execute(path=str(secret)) + + assert result.startswith("Error: Path ") + assert "outside allowed directory" in result From 28b224a908c024ededd6753488b0ea7650ce7aef Mon Sep 17 00:00:00 2001 From: bitifirefly Date: Sun, 22 Mar 2026 08:59:23 +0000 Subject: [PATCH 2/4] feat(exec): add path_append config and strengthen path guards --- docs/deployment-config.md | 1 + opencane/agent/loop.py | 1 + opencane/agent/subagent.py | 1 + opencane/agent/tools/shell.py | 27 ++++++++++++------ opencane/config/schema.py | 1 + tests/test_exec_security.py | 28 +++++++++++++++++++ tests/test_tool_domain_manager.py | 15 ++++++++++ tests/test_tool_validation.py | 46 +++++++++++++++++++++++++++++++ 8 files changed, 112 insertions(+), 8 deletions(-) diff --git a/docs/deployment-config.md b/docs/deployment-config.md index 71fdd58c8b6..4a4a16831cc 100644 --- a/docs/deployment-config.md +++ b/docs/deployment-config.md @@ -49,6 +49,7 @@ opencane config check --strict - `hardware.auth.enabled` - `hardware.auth.token` - `tools.exec.enable`(关闭后不注册 shell `exec` 工具) +- `tools.exec.pathAppend`(可选 PATH 追加目录,供 `exec` 子进程使用) - `tools.restrictToWorkspace`(限制工具只能访问工作区) - `safety.*` - `interaction.*` diff --git a/opencane/agent/loop.py b/opencane/agent/loop.py index f2b1c632db1..ad9c8dd277b 100644 --- a/opencane/agent/loop.py +++ b/opencane/agent/loop.py @@ -316,6 +316,7 @@ def _register_default_tools(self) -> None: working_dir=str(self.workspace), timeout=self.exec_config.timeout, restrict_to_workspace=self.restrict_to_workspace, + path_append=self.exec_config.path_append, )) self.tool_domains.register_tool( "exec", diff --git a/opencane/agent/subagent.py b/opencane/agent/subagent.py index 9fb7b983ff0..a7814b92e25 100644 --- a/opencane/agent/subagent.py +++ b/opencane/agent/subagent.py @@ -119,6 +119,7 @@ async def _run_subagent( working_dir=str(self.workspace), timeout=self.exec_config.timeout, restrict_to_workspace=self.restrict_to_workspace, + path_append=self.exec_config.path_append, )) tools.register(WebSearchTool(api_key=self.brave_api_key)) tools.register(WebFetchTool()) diff --git a/opencane/agent/tools/shell.py b/opencane/agent/tools/shell.py index 3db09cb8b34..3ba860de25c 100644 --- a/opencane/agent/tools/shell.py +++ b/opencane/agent/tools/shell.py @@ -19,6 +19,7 @@ def __init__( deny_patterns: list[str] | None = None, allow_patterns: list[str] | None = None, restrict_to_workspace: bool = False, + path_append: str = "", ): self.timeout = timeout self.working_dir = working_dir @@ -35,6 +36,7 @@ def __init__( ] self.allow_patterns = allow_patterns or [] self.restrict_to_workspace = restrict_to_workspace + self.path_append = path_append @property def name(self) -> str: @@ -67,12 +69,17 @@ async def execute(self, command: str, working_dir: str | None = None, **kwargs: if guard_error: return guard_error + env = os.environ.copy() + if self.path_append: + env["PATH"] = env.get("PATH", "") + os.pathsep + self.path_append + try: process = await asyncio.create_subprocess_shell( command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=cwd, + env=env, ) try: @@ -137,18 +144,22 @@ def _guard_command(self, command: str, cwd: str) -> str | None: cwd_path = Path(cwd).resolve() - win_paths = re.findall(r"[A-Za-z]:\\[^\\\"']+", cmd) - # Only match absolute paths — avoid false positives on relative - # paths like ".venv/bin/python" where "/bin/python" would be - # incorrectly extracted by the old pattern. - posix_paths = re.findall(r"(?:^|[\s|>])(/[^\s\"'>]+)", cmd) - - for raw in win_paths + posix_paths: + for raw in self._extract_absolute_paths(cmd): try: - p = Path(raw.strip()).resolve() + expanded = os.path.expandvars(raw.strip()) + p = Path(expanded).expanduser().resolve() except Exception: continue if p.is_absolute() and cwd_path not in p.parents and p != cwd_path: return "Error: Command blocked by safety guard (path outside working dir)" return None + + @staticmethod + def _extract_absolute_paths(command: str) -> list[str]: + # Match Windows absolute paths without truncating at backslashes. + win_paths = re.findall(r"[A-Za-z]:\\[^\s\"'|><;]+", command) + # Match POSIX absolute paths and ~/home shortcuts (including quoted). + posix_paths = re.findall(r"(?:^|[\s|>'\"])(/[^\s\"'>;|<]+)", command) + home_paths = re.findall(r"(?:^|[\s|>'\"])(~[^\s\"'>;|<]*)", command) + return win_paths + posix_paths + home_paths diff --git a/opencane/config/schema.py b/opencane/config/schema.py index 89d94174170..4faffbe0e9e 100644 --- a/opencane/config/schema.py +++ b/opencane/config/schema.py @@ -224,6 +224,7 @@ class ExecToolConfig(BaseModel): """Shell exec tool configuration.""" enable: bool = True timeout: int = 60 + path_append: str = "" class MCPServerConfig(BaseModel): diff --git a/tests/test_exec_security.py b/tests/test_exec_security.py index 177113713f3..4bc09e04c2c 100644 --- a/tests/test_exec_security.py +++ b/tests/test_exec_security.py @@ -5,6 +5,8 @@ import socket from unittest.mock import patch +import pytest + from opencane.agent.tools.shell import ExecTool @@ -52,3 +54,29 @@ def test_exec_guard_blocks_standalone_format_command() -> None: error = tool._guard_command("echo hi; format c:", "/tmp") assert error is not None assert "blocked by safety guard" in error + + +@pytest.mark.asyncio +async def test_exec_tool_appends_path_when_configured(monkeypatch: pytest.MonkeyPatch) -> None: + captured_env: dict[str, str] = {} + + class _FakeProcess: + returncode = 0 + + async def communicate(self): # type: ignore[no-untyped-def] + return b"ok", b"" + + async def _fake_create_subprocess_shell(*args, **kwargs): # type: ignore[no-untyped-def] + del args + env = kwargs.get("env") or {} + if isinstance(env, dict): + captured_env.update({k: str(v) for k, v in env.items() if isinstance(k, str)}) + return _FakeProcess() + + monkeypatch.setattr("opencane.agent.tools.shell.asyncio.create_subprocess_shell", _fake_create_subprocess_shell) + tool = ExecTool(path_append="/usr/sbin") + + result = await tool.execute("echo ok") + assert "ok" in result + assert "PATH" in captured_env + assert captured_env["PATH"].endswith("/usr/sbin") diff --git a/tests/test_tool_domain_manager.py b/tests/test_tool_domain_manager.py index ec9fb88fe2a..043d773d867 100644 --- a/tests/test_tool_domain_manager.py +++ b/tests/test_tool_domain_manager.py @@ -7,6 +7,7 @@ from opencane.agent.loop import AgentLoop from opencane.agent.tools.base import Tool +from opencane.agent.tools.shell import ExecTool from opencane.bus.events import InboundMessage from opencane.bus.queue import MessageBus from opencane.config.schema import ExecToolConfig @@ -293,3 +294,17 @@ def test_agent_loop_can_disable_exec_tool(tmp_path: Path) -> None: ) assert loop.tools.get("exec") is None + + +def test_agent_loop_passes_exec_path_append_to_tool(tmp_path: Path) -> None: + bus = MessageBus() + loop = AgentLoop( + bus=bus, + provider=_SystemRoleProbeProvider(), + workspace=tmp_path, + exec_config=ExecToolConfig(enable=True, path_append="/usr/local/sbin"), + ) + + tool = loop.tools.get("exec") + assert isinstance(tool, ExecTool) + assert tool.path_append == "/usr/local/sbin" diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py index 9c4896d77ba..338fcdb4477 100644 --- a/tests/test_tool_validation.py +++ b/tests/test_tool_validation.py @@ -2,6 +2,7 @@ from opencane.agent.tools.base import Tool from opencane.agent.tools.registry import ToolRegistry +from opencane.agent.tools.shell import ExecTool class SampleTool(Tool): @@ -88,6 +89,51 @@ async def test_registry_returns_validation_error() -> None: assert "Invalid parameters" in result +def test_exec_extract_absolute_paths_keeps_full_windows_path() -> None: + cmd = r"type C:\user\workspace\txt" + paths = ExecTool._extract_absolute_paths(cmd) + assert paths == [r"C:\user\workspace\txt"] + + +def test_exec_extract_absolute_paths_ignores_relative_posix_segments() -> None: + cmd = ".venv/bin/python script.py" + paths = ExecTool._extract_absolute_paths(cmd) + assert "/bin/python" not in paths + + +def test_exec_extract_absolute_paths_captures_posix_absolute_paths() -> None: + cmd = "cat /tmp/data.txt > /tmp/out.txt" + paths = ExecTool._extract_absolute_paths(cmd) + assert "/tmp/data.txt" in paths + assert "/tmp/out.txt" in paths + + +def test_exec_extract_absolute_paths_captures_home_paths() -> None: + cmd = "cat ~/.opencane/config.json > ~/out.txt" + paths = ExecTool._extract_absolute_paths(cmd) + assert "~/.opencane/config.json" in paths + assert "~/out.txt" in paths + + +def test_exec_extract_absolute_paths_captures_quoted_paths() -> None: + cmd = 'cat "/tmp/data.txt" "~/.opencane/config.json"' + paths = ExecTool._extract_absolute_paths(cmd) + assert "/tmp/data.txt" in paths + assert "~/.opencane/config.json" in paths + + +def test_exec_guard_blocks_home_path_outside_workspace(tmp_path) -> None: + tool = ExecTool(restrict_to_workspace=True) + error = tool._guard_command("cat ~/.opencane/config.json", str(tmp_path)) + assert error == "Error: Command blocked by safety guard (path outside working dir)" + + +def test_exec_guard_blocks_quoted_home_path_outside_workspace(tmp_path) -> None: + tool = ExecTool(restrict_to_workspace=True) + error = tool._guard_command('cat "~/.opencane/config.json"', str(tmp_path)) + assert error == "Error: Command blocked by safety guard (path outside working dir)" + + class NullableTool(Tool): @property def name(self) -> str: From 2e689bfd7384e35c04964a61cd06e9687f03ffca Mon Sep 17 00:00:00 2001 From: bitifirefly Date: Sun, 22 Mar 2026 08:59:33 +0000 Subject: [PATCH 3/4] fix(auth): tighten allowlist matching and preserve telegram legacy IDs --- opencane/channels/base.py | 10 +--- opencane/channels/telegram.py | 20 +++++++- tests/test_base_channel.py | 36 ++++++++++++++ tests/test_telegram_send_reply.py | 82 +++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 9 deletions(-) create mode 100644 tests/test_base_channel.py diff --git a/opencane/channels/base.py b/opencane/channels/base.py index 85fefabbdb0..7fabed4a31f 100644 --- a/opencane/channels/base.py +++ b/opencane/channels/base.py @@ -73,15 +73,9 @@ def is_allowed(self, sender_id: str) -> bool: # If no allow list, allow everyone if not allow_list: return True - - sender_str = str(sender_id) - if sender_str in allow_list: + if "*" in allow_list: return True - if "|" in sender_str: - for part in sender_str.split("|"): - if part and part in allow_list: - return True - return False + return str(sender_id) in allow_list async def _handle_message( self, diff --git a/opencane/channels/telegram.py b/opencane/channels/telegram.py index 357786ee027..1ffd236f5dc 100644 --- a/opencane/channels/telegram.py +++ b/opencane/channels/telegram.py @@ -118,6 +118,23 @@ def __init__( self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies self._typing_tasks: dict[str, asyncio.Task] = {} # chat_id -> typing loop task + def is_allowed(self, sender_id: str) -> bool: + """Preserve Telegram legacy allowlist support for sender_id|username.""" + if super().is_allowed(sender_id): + return True + + allow_list = getattr(self.config, "allow_from", []) + if not allow_list or "*" in allow_list: + return False + + sender_str = str(sender_id) + if sender_str.count("|") != 1: + return False + sender_num, username = sender_str.split("|", 1) + if not sender_num.isdigit() or not username: + return False + return sender_num in allow_list or username in allow_list + async def start(self) -> None: """Start the Telegram bot with long polling.""" if not self.config.token: @@ -438,7 +455,8 @@ async def _on_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) media_dir = get_data_path() / "media" media_dir.mkdir(parents=True, exist_ok=True) - file_path = media_dir / f"{media_file.file_id[:16]}{ext}" + unique_id = getattr(media_file, "file_unique_id", None) or media_file.file_id[:16] + file_path = media_dir / f"{unique_id}{ext}" await file.download_to_drive(str(file_path)) media_paths.append(str(file_path)) diff --git a/tests/test_base_channel.py b/tests/test_base_channel.py new file mode 100644 index 00000000000..de2a64039f2 --- /dev/null +++ b/tests/test_base_channel.py @@ -0,0 +1,36 @@ +from types import SimpleNamespace + +from opencane.bus.events import OutboundMessage +from opencane.bus.queue import MessageBus +from opencane.channels.base import BaseChannel + + +class _DummyChannel(BaseChannel): + name = "dummy" + + async def start(self) -> None: + return None + + async def stop(self) -> None: + return None + + async def send(self, msg: OutboundMessage) -> None: + del msg + return None + + +def test_is_allowed_requires_exact_match_without_token_splitting() -> None: + channel = _DummyChannel(SimpleNamespace(allow_from=["allow@email.com"]), MessageBus()) + + assert channel.is_allowed("allow@email.com") is True + assert channel.is_allowed("attacker|allow@email.com") is False + + +def test_is_allowed_wildcard_allows_all_senders() -> None: + channel = _DummyChannel(SimpleNamespace(allow_from=["*"]), MessageBus()) + assert channel.is_allowed("anyone") is True + + +def test_is_allowed_empty_allowlist_keeps_backward_compatible_open_access() -> None: + channel = _DummyChannel(SimpleNamespace(allow_from=[]), MessageBus()) + assert channel.is_allowed("someone") is True diff --git a/tests/test_telegram_send_reply.py b/tests/test_telegram_send_reply.py index b4c7d4d8451..362f65368ad 100644 --- a/tests/test_telegram_send_reply.py +++ b/tests/test_telegram_send_reply.py @@ -15,6 +15,7 @@ class _FakeBot: def __init__(self) -> None: self.calls: list[dict] = [] self.media_calls: list[dict] = [] + self.file = None async def send_message(self, **kwargs): # type: ignore[no-untyped-def] self.calls.append(kwargs) @@ -36,6 +37,9 @@ async def send_document(self, **kwargs): # type: ignore[no-untyped-def] self.media_calls.append({"kind": "document", **kwargs}) return None + async def get_file(self, _file_id): # type: ignore[no-untyped-def] + return self.file + class _FakeApp: def __init__(self, bot: _FakeBot) -> None: @@ -382,3 +386,81 @@ def test_telegram_get_extension_prefers_known_mime_type() -> None: ) assert channel._get_extension("file", "audio/ogg", "voice.mp3") == ".ogg" + + +def test_telegram_is_allowed_accepts_legacy_id_or_username_allowlist() -> None: + channel = TelegramChannel( + config=TelegramConfig(enabled=True, reply_to_message=False, allow_from=["12345", "alice", "67890|bob"]), + bus=MessageBus(), + ) + + assert channel.is_allowed("12345|carol") is True + assert channel.is_allowed("99999|alice") is True + assert channel.is_allowed("67890|bob") is True + + +def test_telegram_is_allowed_rejects_invalid_legacy_sender_shapes() -> None: + channel = TelegramChannel( + config=TelegramConfig(enabled=True, reply_to_message=False, allow_from=["alice"]), + bus=MessageBus(), + ) + + assert channel.is_allowed("attacker|alice|extra") is False + assert channel.is_allowed("not-a-number|alice") is False + + +@pytest.mark.asyncio +async def test_telegram_on_message_uses_file_unique_id_for_media_filename( + monkeypatch: pytest.MonkeyPatch, + tmp_path, +) -> None: + bot = _FakeBot() + channel = TelegramChannel( + config=TelegramConfig(enabled=True, reply_to_message=False, allow_from=["*"]), + bus=MessageBus(), + ) + channel._app = _FakeApp(bot) # type: ignore[assignment] + + downloaded: dict[str, str] = {} + + class _FakeDownloadedFile: + async def download_to_drive(self, path: str) -> None: + downloaded["path"] = path + + bot.file = _FakeDownloadedFile() + monkeypatch.setattr("opencane.channels.telegram.get_data_path", lambda: tmp_path) + + captured: dict = {} + + async def _capture_handle_message(**kwargs): # type: ignore[no-untyped-def] + captured.update(kwargs) + + monkeypatch.setattr(channel, "_handle_message", _capture_handle_message) + monkeypatch.setattr(channel, "_start_typing", lambda _chat_id: None) + + update = SimpleNamespace( + effective_user=SimpleNamespace(id=123, username="alice", first_name="Alice"), + message=SimpleNamespace( + message_id=1, + chat_id=456, + chat=SimpleNamespace(type="private"), + text=None, + caption=None, + photo=[ + SimpleNamespace( + file_id="file-id-that-should-not-be-used", + file_unique_id="stable-unique-id", + mime_type="image/jpeg", + file_name=None, + ) + ], + voice=None, + audio=None, + document=None, + ), + ) + + await channel._on_message(update, None) # type: ignore[arg-type] + + assert downloaded["path"].endswith("stable-unique-id.jpg") + assert captured["media"] == [str(tmp_path / "media" / "stable-unique-id.jpg")] From 5b656beb8ca6d7c2d4c1a14433683eb2c4f439f1 Mon Sep 17 00:00:00 2001 From: bitifirefly Date: Sun, 22 Mar 2026 08:59:43 +0000 Subject: [PATCH 4/4] fix(email): evict oldest processed UIDs instead of full reset --- opencane/channels/email.py | 22 ++++++++++++++++++---- tests/test_email_channel.py | 11 +++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/opencane/channels/email.py b/opencane/channels/email.py index 353e2b72e52..7c3a1d589f8 100644 --- a/opencane/channels/email.py +++ b/opencane/channels/email.py @@ -6,6 +6,7 @@ import re import smtplib import ssl +from collections import deque from datetime import date from email import policy from email.header import decode_header, make_header @@ -71,6 +72,7 @@ def __init__(self, config: EmailConfig, bus: MessageBus): self._last_subject_by_chat: dict[str, str] = {} self._last_message_id_by_chat: dict[str, str] = {} self._processed_uids: set[str] = set() # Capped to prevent unbounded growth + self._processed_uid_order: deque[str] = deque() self._MAX_PROCESSED_UIDS = 100000 async def start(self) -> None: @@ -356,10 +358,7 @@ def _fetch_messages_once( if uid: cycle_uids.add(uid) if dedupe and uid: - self._processed_uids.add(uid) - # mark_seen is the primary dedup; this set is a safety net - if len(self._processed_uids) > self._MAX_PROCESSED_UIDS: - self._processed_uids.clear() + self._remember_processed_uid(uid) if mark_seen: client.store(imap_id, "+FLAGS", "\\Seen") @@ -379,6 +378,21 @@ def _is_missing_mailbox_error(cls, exc: Exception) -> bool: message = str(exc).lower() return any(marker in message for marker in cls._IMAP_MISSING_MAILBOX_MARKERS) + def _remember_processed_uid(self, uid: str) -> None: + """Track a processed UID and evict oldest entries when over capacity.""" + if uid in self._processed_uids: + return + self._processed_uids.add(uid) + self._processed_uid_order.append(uid) + + if len(self._processed_uids) <= self._MAX_PROCESSED_UIDS: + return + + keep = max(self._MAX_PROCESSED_UIDS // 2, 1) + while len(self._processed_uids) > keep and self._processed_uid_order: + stale = self._processed_uid_order.popleft() + self._processed_uids.discard(stale) + @classmethod def _format_imap_date(cls, value: date) -> str: """Format date for IMAP search (always English month abbreviations).""" diff --git a/tests/test_email_channel.py b/tests/test_email_channel.py index 67d71264ea3..d9e9184cee7 100644 --- a/tests/test_email_channel.py +++ b/tests/test_email_channel.py @@ -40,6 +40,17 @@ def _make_raw_email( return msg.as_bytes() +def test_remember_processed_uid_evicts_oldest_half_when_capacity_exceeded() -> None: + channel = EmailChannel(_make_config(), MessageBus()) + channel._MAX_PROCESSED_UIDS = 4 + + for uid in ("u1", "u2", "u3", "u4", "u5"): + channel._remember_processed_uid(uid) + + assert channel._processed_uids == {"u4", "u5"} + assert list(channel._processed_uid_order) == ["u4", "u5"] + + def test_fetch_new_messages_parses_unseen_and_marks_seen(monkeypatch) -> None: raw = _make_raw_email(subject="Invoice", body="Please pay")