Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion backend/app/agent/factory/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

# TODO: Remove NoteTakingToolkit and use TerminalToolkit instead
from app.agent.toolkit.note_taking_toolkit import NoteTakingToolkit
from app.agent.toolkit.screenshot_toolkit import ScreenshotToolkit
from app.agent.toolkit.search_toolkit import SearchToolkit
from app.agent.toolkit.skill_toolkit import SkillToolkit
from app.agent.toolkit.terminal_toolkit import TerminalToolkit
Expand Down Expand Up @@ -97,6 +98,16 @@ def browser_agent(options: Chat):
working_directory=working_directory,
)
note_toolkit = message_integration.register_toolkits(note_toolkit)
screenshot_toolkit = ScreenshotToolkit(
options.project_id,
working_directory=working_directory,
agent_name=Agents.browser_agent,
)
# Save reference before registering for toolkits_to_register_agent
screenshot_toolkit_for_agent_registration = screenshot_toolkit
screenshot_toolkit = message_integration.register_toolkits(
screenshot_toolkit
)

skill_toolkit = SkillToolkit(
options.project_id,
Expand All @@ -119,6 +130,7 @@ def browser_agent(options: Chat):
*web_toolkit_custom.get_tools(),
*terminal_toolkit,
*note_toolkit.get_tools(),
*screenshot_toolkit.get_tools(),
*search_tools,
*skill_toolkit.get_tools(),
]
Expand All @@ -145,8 +157,12 @@ def browser_agent(options: Chat):
HumanToolkit.toolkit_name(),
NoteTakingToolkit.toolkit_name(),
TerminalToolkit.toolkit_name(),
ScreenshotToolkit.toolkit_name(),
SkillToolkit.toolkit_name(),
],
toolkits_to_register_agent=[web_toolkit_for_agent_registration],
toolkits_to_register_agent=[
web_toolkit_for_agent_registration,
screenshot_toolkit_for_agent_registration,
],
enable_snapshot_clean=True,
)
10 changes: 9 additions & 1 deletion backend/app/agent/factory/developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ async def developer_agent(options: Chat):
web_deploy_toolkit
)
screenshot_toolkit = ScreenshotToolkit(
options.project_id, working_directory=working_directory
options.project_id,
working_directory=working_directory,
agent_name=Agents.developer_agent,
)
# Save reference before registering for toolkits_to_register_agent
screenshot_toolkit_for_agent_registration = screenshot_toolkit
screenshot_toolkit = message_integration.register_toolkits(
screenshot_toolkit
)
Expand Down Expand Up @@ -109,6 +113,10 @@ async def developer_agent(options: Chat):
TerminalToolkit.toolkit_name(),
NoteTakingToolkit.toolkit_name(),
WebDeployToolkit.toolkit_name(),
ScreenshotToolkit.toolkit_name(),
SkillToolkit.toolkit_name(),
],
toolkits_to_register_agent=[
screenshot_toolkit_for_agent_registration,
],
)
16 changes: 16 additions & 0 deletions backend/app/agent/factory/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
# TODO: Remove NoteTakingToolkit and use TerminalToolkit instead
from app.agent.toolkit.note_taking_toolkit import NoteTakingToolkit
from app.agent.toolkit.pptx_toolkit import PPTXToolkit
from app.agent.toolkit.screenshot_toolkit import ScreenshotToolkit
from app.agent.toolkit.skill_toolkit import SkillToolkit
from app.agent.toolkit.terminal_toolkit import TerminalToolkit
from app.agent.utils import NOW_STR
Expand Down Expand Up @@ -69,6 +70,16 @@ async def document_agent(options: Chat):
working_directory=working_directory,
)
note_toolkit = message_integration.register_toolkits(note_toolkit)
screenshot_toolkit = ScreenshotToolkit(
options.project_id,
working_directory=working_directory,
agent_name=Agents.document_agent,
)
# Save reference before registering for toolkits_to_register_agent
screenshot_toolkit_for_agent_registration = screenshot_toolkit
screenshot_toolkit = message_integration.register_toolkits(
screenshot_toolkit
)

terminal_toolkit = TerminalToolkit(
options.project_id,
Expand Down Expand Up @@ -101,6 +112,7 @@ async def document_agent(options: Chat):
*excel_toolkit.get_tools(),
*note_toolkit.get_tools(),
*terminal_toolkit.get_tools(),
*screenshot_toolkit.get_tools(),
*google_drive_tools,
*skill_toolkit.get_tools(),
]
Expand All @@ -127,7 +139,11 @@ async def document_agent(options: Chat):
ExcelToolkit.toolkit_name(),
NoteTakingToolkit.toolkit_name(),
TerminalToolkit.toolkit_name(),
ScreenshotToolkit.toolkit_name(),
GoogleDriveMCPToolkit.toolkit_name(),
SkillToolkit.toolkit_name(),
],
toolkits_to_register_agent=[
screenshot_toolkit_for_agent_registration,
],
)
3 changes: 3 additions & 0 deletions backend/app/agent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@

<capabilities>
Your capabilities include:
- You can use ScreenshotToolkit to read image with given path.
- **Skills System**: You have access to a library of specialized skills that
provide expert guidance for specific tasks. When a skill is referenced with
double curly braces (e.g., {{pdf}} or {{data-analyzer}}), you should use
Expand Down Expand Up @@ -435,6 +436,7 @@

<capabilities>
Your capabilities are extensive and powerful:
- You can use ScreenshotToolkit to read image with given path.
- **Skills System**: You have access to a library of specialized skills that
provide expert guidance for specific tasks. When a skill is referenced with
double curly braces (e.g., {{pdf}} or {{data-analyzer}}), you should use
Expand Down Expand Up @@ -610,6 +612,7 @@

<capabilities>
Your capabilities include:
- You can use ScreenshotToolkit to read image with given path.
- **Skills System**: You have access to a library of specialized skills that
provide expert guidance for specific tasks. When a skill is referenced with
double curly braces (e.g., {{pdf}} or {{data-analyzer}}), you should use
Expand Down
5 changes: 3 additions & 2 deletions backend/app/agent/toolkit/screenshot_toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,22 @@

from app.agent.toolkit.abstract_toolkit import AbstractToolkit
from app.component.environment import env
from app.service.task import Agents
from app.utils.listen.toolkit_listen import auto_listen_toolkit


@auto_listen_toolkit(BaseScreenshotToolkit)
class ScreenshotToolkit(BaseScreenshotToolkit, AbstractToolkit):
agent_name: str = Agents.developer_agent
agent_name: str

def __init__(
self,
api_task_id,
agent_name: str,
working_directory: str | None = None,
timeout: float | None = None,
):
self.api_task_id = api_task_id
self.agent_name = agent_name
if working_directory is None:
working_directory = env(
"file_save_path", os.path.expanduser("~/Downloads")
Expand Down
8 changes: 8 additions & 0 deletions backend/tests/app/agent/factory/test_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from app.agent.factory import browser_agent
from app.model.chat import Chat
from app.service.task import Agents

pytestmark = pytest.mark.unit

Expand All @@ -43,6 +44,7 @@ def test_browser_agent_creation(sample_chat_data):
patch(f"{_mod}.HybridBrowserToolkit") as mock_browser_toolkit,
patch(f"{_mod}.TerminalToolkit") as mock_terminal_toolkit,
patch(f"{_mod}.NoteTakingToolkit") as mock_note_toolkit,
patch(f"{_mod}.ScreenshotToolkit") as mock_screenshot_toolkit,
patch(f"{_mod}.SearchToolkit") as mock_search_toolkit,
patch(f"{_mod}.ToolkitMessageIntegration"),
patch("uuid.uuid4") as mock_uuid,
Expand All @@ -57,6 +59,7 @@ def test_browser_agent_creation(sample_chat_data):
mock_terminal_toolkit.return_value = mock_terminal_instance

mock_note_toolkit.return_value.get_tools.return_value = []
mock_screenshot_toolkit.return_value.get_tools.return_value = []
mock_search_instance = MagicMock()
mock_search_instance.search_google = MagicMock()
mock_search_toolkit.return_value = mock_search_instance
Expand All @@ -69,6 +72,11 @@ def test_browser_agent_creation(sample_chat_data):

assert result is mock_agent
mock_agent_model.assert_called_once()
mock_screenshot_toolkit.assert_called_once_with(
options.project_id,
working_directory="/tmp/test_workdir",
agent_name=Agents.browser_agent,
)

# Check that it was called with browser agent configuration
call_args = mock_agent_model.call_args
Expand Down
6 changes: 6 additions & 0 deletions backend/tests/app/agent/factory/test_developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from app.agent.factory import developer_agent
from app.model.chat import Chat
from app.service.task import Agents

pytestmark = pytest.mark.unit

Expand Down Expand Up @@ -61,6 +62,11 @@ async def test_developer_agent_creation(sample_chat_data):

assert result is mock_agent
mock_agent_model.assert_called_once()
mock_screenshot_toolkit.assert_called_once_with(
options.project_id,
working_directory="/tmp/test_workdir",
agent_name=Agents.developer_agent,
)

# Should have called with development-related tools
call_args = mock_agent_model.call_args
Expand Down
8 changes: 8 additions & 0 deletions backend/tests/app/agent/factory/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from app.agent.factory import document_agent
from app.model.chat import Chat
from app.service.task import Agents

pytestmark = pytest.mark.unit

Expand Down Expand Up @@ -46,6 +47,7 @@ async def test_document_agent_creation(sample_chat_data):
patch(f"{_mod}.MarkItDownToolkit") as mock_markdown_toolkit,
patch(f"{_mod}.ExcelToolkit") as mock_excel_toolkit,
patch(f"{_mod}.NoteTakingToolkit") as mock_note_toolkit,
patch(f"{_mod}.ScreenshotToolkit") as mock_screenshot_toolkit,
patch(f"{_mod}.TerminalToolkit") as mock_terminal_toolkit,
patch(f"{_mod}.GoogleDriveMCPToolkit") as mock_gdrive_toolkit,
patch(f"{_mod}.ToolkitMessageIntegration"),
Expand All @@ -57,6 +59,7 @@ async def test_document_agent_creation(sample_chat_data):
mock_markdown_toolkit.return_value.get_tools.return_value = []
mock_excel_toolkit.return_value.get_tools.return_value = []
mock_note_toolkit.return_value.get_tools.return_value = []
mock_screenshot_toolkit.return_value.get_tools.return_value = []
mock_terminal_toolkit.return_value.get_tools.return_value = []
mock_gdrive_toolkit.get_can_use_tools = AsyncMock(return_value=[])

Expand All @@ -67,6 +70,11 @@ async def test_document_agent_creation(sample_chat_data):

assert result is mock_agent
mock_agent_model.assert_called_once()
mock_screenshot_toolkit.assert_called_once_with(
options.project_id,
working_directory="/tmp/test_workdir",
agent_name=Agents.document_agent,
)

# Should have called with document-related tools
call_args = mock_agent_model.call_args
Expand Down
4 changes: 2 additions & 2 deletions src/components/Navigation/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ export function VerticalNavigation({
value={value}
defaultValue={initial}
onValueChange={onValueChange}
className={cn('flex-1 w-full', className)}
className={cn('w-full flex-1', className)}
>
<TabsList
className={cn(
'flex flex-col w-full gap-1.5 rounded-none border-none bg-transparent p-0',
'flex w-full flex-col gap-1.5 rounded-none border-none bg-transparent p-0',
listClassName
)}
>
Expand Down
2 changes: 1 addition & 1 deletion src/components/SearchInput/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ export default function SearchInput({
return (
<motion.div
className={cn(
'flex items-center justify-center py-0.5 overflow-hidden rounded-lg border border-solid border-transparent bg-transparent',
'flex items-center justify-center overflow-hidden rounded-lg border border-solid border-transparent bg-transparent py-0.5',
'focus-within:border-input-border-focus focus-within:bg-input-bg-input',
'hover:border-transparent hover:bg-surface-tertiary'
)}
Expand Down
6 changes: 3 additions & 3 deletions src/components/ui/alertDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export default function ConfirmModal({
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
className="alert-dialog fixed inset-0 z-[99] bg-black/20"
className="alert-dialog bg-black/20 fixed inset-0 z-[99]"
style={{ backgroundColor: 'rgba(0, 0, 0, 0.2)' }}
onClick={onClose}
/>
Expand All @@ -65,9 +65,9 @@ export default function ConfirmModal({
initial={{ opacity: 0, scale: 0.9, y: 20 }}
animate={{ opacity: 1, scale: 1, y: 0 }}
exit={{ opacity: 0, scale: 0.9, y: 20 }}
className="alert-dialog-wrapper fixed left-1/2 top-1/2 z-[100] max-w-md rounded-xl -translate-x-1/2 -translate-y-1/2"
className="alert-dialog-wrapper fixed left-1/2 top-1/2 z-[100] max-w-md -translate-x-1/2 -translate-y-1/2 rounded-xl"
>
<div className="p-6 rounded-xl border border-popup-border bg-surface-tertiary shadow-perfect">
<div className="rounded-xl border border-popup-border bg-surface-tertiary p-6 shadow-perfect">
<span className="mb-2 text-body-lg font-bold text-text-primary">
{title}
</span>
Expand Down
2 changes: 1 addition & 1 deletion src/components/ui/toggle-group.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const ToggleGroupItem = React.forwardRef<
variant: context.variant || variant,
size: context.size || size,
}),
'bg-surface-primary border-border-disabled data-[state=on]:bg-surface-tertiary data-[state=on]:border-border-secondary',
'border-border-disabled bg-surface-primary data-[state=on]:border-border-secondary data-[state=on]:bg-surface-tertiary',
className
)}
{...props}
Expand Down
28 changes: 14 additions & 14 deletions src/pages/Agents/components/SkillUploadDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,10 @@ export default function SkillUploadDialog({
>
<DialogHeader title={t('agents.add-skill')} />
<DialogContentSection>
<div className="gap-4 flex flex-col">
<div className="flex flex-col gap-4">
{/* Drop Zone */}
<div
className={`rounded-xl p-8 ease-in relative cursor-pointer border-2 border-dashed transition-colors duration-300 ${
className={`relative cursor-pointer rounded-xl border-2 border-dashed p-8 transition-colors duration-300 ease-in ${
uploadError
? 'border-border-cuation bg-surface-cuation'
: isDragging
Expand All @@ -444,10 +444,10 @@ export default function SkillUploadDialog({
/>

{selectedFile ? (
<div className="gap-6 flex flex-col items-center">
<div className="gap-2 flex items-center">
<div className="flex flex-col items-center gap-6">
<div className="flex items-center gap-2">
<div
className={`p-1 rounded-lg flex flex-shrink-0 items-center justify-center ${
className={`flex flex-shrink-0 items-center justify-center rounded-lg p-1 ${
uploadError
? 'bg-surface-cuation'
: 'bg-surface-tertiary'
Expand All @@ -461,9 +461,9 @@ export default function SkillUploadDialog({
}`}
/>
</div>
<div className="min-w-0 flex w-full flex-col">
<div className="flex w-full min-w-0 flex-col">
<span
className={`text-body-sm font-medium truncate ${
className={`truncate text-body-sm font-medium ${
uploadError
? 'text-text-cuation'
: 'text-text-heading'
Expand Down Expand Up @@ -495,11 +495,11 @@ export default function SkillUploadDialog({
</span>
</div>
) : (
<div className="gap-2 flex flex-col items-center">
<div className="h-12 w-12 flex items-center justify-center">
<div className="flex flex-col items-center gap-2">
<div className="flex h-12 w-12 items-center justify-center">
<Upload className="h-6 w-6 text-icon-secondary" />
</div>
<div className="gap-1 flex flex-col items-center text-center">
<div className="flex flex-col items-center gap-1 text-center">
<span className="text-body-sm font-medium text-text-heading">
{t('agents.drag-and-drop')}
</span>
Expand All @@ -514,10 +514,10 @@ export default function SkillUploadDialog({
{/* Error notice */}
{uploadError && errorMessage && (
<div
className="gap-4 rounded-xl border-border-cuation bg-surface-cuation px-4 py-3 flex items-center border"
className="flex items-center gap-4 rounded-xl border border-border-cuation bg-surface-cuation px-4 py-3"
role="alert"
>
<AlertCircle className="h-4 w-4 text-icon-cuation shrink-0" />
<AlertCircle className="h-4 w-4 shrink-0 text-icon-cuation" />
<span className="text-label-sm text-text-cuation">
{errorMessage}
</span>
Expand All @@ -529,11 +529,11 @@ export default function SkillUploadDialog({
<span className="text-label-sm font-bold text-text-body">
{t('agents.file-requirements')}
</span>
<span className="mt-2 gap-2 text-label-sm text-text-label flex items-start">
<span className="mt-2 flex items-start gap-2 text-label-sm text-text-label">
<span className="text-text-label">•</span>
<span>{t('agents.file-requirements-detail-1')}</span>
</span>
<span className="mt-1 gap-2 text-label-sm text-text-label flex items-start">
<span className="mt-1 flex items-start gap-2 text-label-sm text-text-label">
<span className="text-text-label">•</span>
<span>{t('agents.file-requirements-detail-2')}</span>
</span>
Expand Down