diff --git a/src/uipath/dev/__init__.py b/src/uipath/dev/__init__.py
index 0110315..1c6076a 100644
--- a/src/uipath/dev/__init__.py
+++ b/src/uipath/dev/__init__.py
@@ -10,8 +10,16 @@
 from textual import on
 from textual.app import App, ComposeResult
 from textual.binding import Binding
-from textual.containers import Container, Horizontal
-from textual.widgets import Button, Footer, Input, ListView, RichLog
+from textual.containers import Container, Horizontal, ScrollableContainer
+from textual.widgets import (
+    Button,
+    Footer,
+    Input,
+    ListView,
+    RichLog,
+    TabbedContent,
+    TabPane,
+)
 from uipath.core.tracing import UiPathTraceManager
 from uipath.runtime import UiPathRuntimeFactoryProtocol
 
@@ -20,14 +28,30 @@
 )
 from uipath.dev.models import (
     ChatMessage,
+    EvalRun,
     ExecutionMode,
     ExecutionRun,
     LogMessage,
     TraceMessage,
 )
 from uipath.dev.models.chat import get_user_message, get_user_message_event
-from uipath.dev.services import RunService
-from uipath.dev.ui.panels import NewRunPanel, RunDetailsPanel, RunHistoryPanel
+from uipath.dev.services import (
+    EvalRunService,
+    EvalSetService,
+    EvaluatorService,
+    RunService,
+)
+from uipath.dev.ui.panels.evals import (
+    AssignEvaluatorPanel,
+    EvalRunDetailsPanel,
+    EvalRunsListPanel,
+    EvalSetCreatePanel,
+    EvaluationEditPanel,
+    EvaluationsListPanel,
+)
+from uipath.dev.ui.panels.evaluators import EvaluatorFormPanel
+from uipath.dev.ui.panels.runs import NewRunPanel, RunDetailsPanel
+from uipath.dev.ui.panels.sidebar import SidebarPanel
 
 
 class UiPathDeveloperConsole(App[Any]):
@@ -74,59 +98,320 @@ def __init__(
             on_chat=self._on_chat_for_ui,
         )
 
+        # Evaluations services
+        self.evaluator_service = EvaluatorService()
+        self.eval_set_service = EvalSetService()
+        self.eval_run_service = EvalRunService(
+            trace_manager=self.trace_manager,
+            on_run_updated=self._on_eval_run_updated,
+            on_log=self._on_eval_log_for_ui,
+            on_trace=self._on_eval_trace_for_ui,
+        )
+
         # Just defaults for convenience
         self.initial_entrypoint: str = "main.py"
         self.initial_input: str = '{\n  "message": "Hello World"\n}'
 
+        # Track currently displayed eval run for auto-refresh
+        self._current_eval_run_id: str | None = None
+
     def compose(self) -> ComposeResult:
         """Compose the UI layout."""
         with Horizontal():
-            # Left sidebar - run history
-            with Container(classes="run-history"):
-                yield RunHistoryPanel(id="history-panel")
+            # Left sidebar - run history, eval sets and evaluators
+            with Container(classes="left-panel"):
+                yield SidebarPanel(
+                    evaluator_service=self.evaluator_service,
+                    eval_set_service=self.eval_set_service,
+                    eval_run_service=self.eval_run_service,
+                    on_run_selected=self._on_sidebar_run_selected,
+                    on_new_run_clicked=self._on_new_run_clicked,
+                    # Eval sets callbacks
+                    on_evaluation_selected=self._on_evaluation_selected,
+                    on_add_evaluation_clicked=self._on_add_evaluation_clicked,
+                    on_assign_evaluator_clicked=self._on_assign_evaluator_clicked,
+                    on_create_eval_set_clicked=self._on_create_eval_set_clicked,
+                    on_eval_set_changed=self._on_eval_set_changed,
+                    # Evaluators callbacks
+                    on_evaluator_selected=self._on_evaluator_selected,
+                    on_new_evaluator_clicked=self._on_new_evaluator_clicked,
+                    id="left-panel",
+                )
 
             # Main content area
-            with Container(classes="main-content"):
-                # New run panel (initially visible)
-                yield NewRunPanel(
-                    id="new-run-panel",
-                    classes="new-run-panel",
-                    runtime_factory=self.runtime_factory,
-                )
+            with Horizontal(id="main-content-split", classes="main-content"):
+                # Middle panel - contains different views based on context
+                with Container(id="middle-panel", classes="middle-panel"):
+                    # New Run tabs - visible when Run History is selected
+                    with TabbedContent(id="new-run-tabs"):
+                        with TabPane("New Run", id="new-run-tab"):
+                            yield NewRunPanel(
+                                id="new-run-panel",
+                                classes="new-run-panel-content",
+                                runtime_factory=self.runtime_factory,
+                            )
+
+                    # Eval tabs - visible when Eval Sets is selected
+                    with TabbedContent(id="eval-tabs", classes="hidden"):
+                        # Evaluations tab
+                        with TabPane("Evaluations", id="evaluations-tab"):
+                            yield EvaluationsListPanel(
+                                on_add_clicked=self._on_add_evaluation_clicked,
+                                on_assign_clicked=self._on_assign_evaluator_clicked,
+                                on_evaluation_selected=self._on_evaluation_selected,
+                                id="evaluations-list-panel",
+                            )
+
+                        # Runs tab - shows eval runs for the selected eval set
+                        with TabPane("Runs", id="runs-tab"):
+                            yield EvalRunsListPanel(
+                                eval_run_service=self.eval_run_service,
+                                on_run_selected=self._on_eval_run_selected,
+                                id="runs-list-panel",
+                            )
+
+                    # Create tabs - visible when creating new eval set
+                    with TabbedContent(id="create-tabs", classes="hidden"):
+                        with TabPane("Create", id="create-tab"):
+                            yield EvalSetCreatePanel(
+                                on_create=self._on_eval_set_create,
+                                on_close=self._on_hide_create_panel,
+                                id="eval-set-create-panel",
+                            )
+
+                    # Evaluator create tabs - visible when creating new evaluator
+                    with TabbedContent(id="evaluator-create-tabs", classes="hidden"):
+                        with TabPane("Create Evaluator", id="evaluator-create-tab"):
+                            yield EvaluatorFormPanel(
+                                on_save=self._on_evaluator_save,
+                                on_delete=self._on_evaluator_delete,
+                                on_close=self._on_evaluator_form_close,
+                                id="evaluator-form-panel",
+                            )
+
+                    # Run details panel (initially hidden) - shown when viewing run details
+                    yield RunDetailsPanel(id="details-panel", classes="hidden")
+
+                # Right panel - edit panel (initially hidden)
+                with Container(
+                    id="right-edit-panel", classes="right-edit-panel hidden"
+                ):
+                    with TabbedContent(id="edit-tabs"):
+                        with TabPane("Edit", id="edit-tab"):
+                            # Pre-compose all edit panels, toggle visibility
+                            yield EvaluationEditPanel(
+                                evaluator_service=self.evaluator_service,
+                                on_save=self._on_evaluation_save,
+                                on_delete=self._on_evaluation_delete,
+                                on_close=self._hide_right_edit_panel,
+                                id="evaluation-edit-panel",
+                                classes="hidden",
+                            )
+                            yield AssignEvaluatorPanel(
+                                evaluator_service=self.evaluator_service,
+                                on_assign=self._on_evaluators_assign,
+                                on_close=self._hide_right_edit_panel,
+                                id="assign-evaluator-panel",
+                                classes="hidden",
+                            )
+                            # Container for evaluator create form (populated dynamically)
+                            yield ScrollableContainer(
+                                id="evaluator-create-content",
+                                classes="hidden",
+                            )
 
-                # Run details panel (initially hidden)
-                yield RunDetailsPanel(id="details-panel", classes="hidden")
+                    with TabbedContent(id="eval-run-tabs", classes="hidden"):
+                        with TabPane("Details", id="eval-run-tab"):
+                            yield EvalRunDetailsPanel(
+                                id="eval-run-details-panel",
+                            )
 
         yield Footer()
 
+    def _get_button_handlers(self) -> dict[str, Any]:
+        """Get button ID to handler mapping for exact matches."""
+        return {
+            "new-run-btn": self.action_new_run,
+            "execute-btn": lambda: self.action_execute_run(mode=ExecutionMode.RUN),
+            "debug-btn": lambda: self.action_execute_run(mode=ExecutionMode.DEBUG),
+            "chat-btn": lambda: self.action_execute_run(mode=ExecutionMode.CHAT),
+            "cancel-btn": self.action_cancel,
+            "debug-step-btn": self.action_debug_step,
+            "debug-continue-btn": self.action_debug_continue,
+            "debug-stop-btn": self.action_debug_stop,
+            "eval-run-btn": self.action_run_eval,
+            "close-evaluator-detail-btn": self._hide_right_edit_panel,
+            "create-evaluator-btn": self._on_create_evaluator_btn_clicked,
+        }
+
+    def _get_prefix_button_handlers(self) -> list[tuple[str, Any]]:
+        """Get button prefix to handler mapping for startswith matches."""
+        return [
+            ("close-detail-btn", self._on_hide_eval_set_detail),
+            ("close-right-panel-btn", self._hide_right_edit_panel),
+        ]
+
     async def on_button_pressed(self, event: Button.Pressed) -> None:
         """Handle button press events."""
-        if event.button.id == "new-run-btn":
-            await self.action_new_run()
-        elif event.button.id == "execute-btn":
-            await self.action_execute_run(mode=ExecutionMode.RUN)
-        elif event.button.id == "debug-btn":
-            await self.action_execute_run(mode=ExecutionMode.DEBUG)
-        elif event.button.id == "chat-btn":
-            await self.action_execute_run(mode=ExecutionMode.CHAT)
-        elif event.button.id == "cancel-btn":
-            await self.action_cancel()
-        elif event.button.id == "debug-step-btn":
-            await self.action_debug_step()
-        elif event.button.id == "debug-continue-btn":
-            await self.action_debug_continue()
-        elif event.button.id == "debug-stop-btn":
-            await self.action_debug_stop()
+        btn_id = event.button.id or ""
+
+        # Try exact match first
+        handlers = self._get_button_handlers()
+        if btn_id in handlers:
+            result = handlers[btn_id]()
+            if asyncio.iscoroutine(result):
+                await result
+            return
+
+        # Try prefix matches
+        for prefix, handler in self._get_prefix_button_handlers():
+            if btn_id.startswith(prefix):
+                result = handler()
+                if asyncio.iscoroutine(result):
+                    await result
+                return
 
     async def on_list_view_selected(self, event: ListView.Selected) -> None:
-        """Handle run selection from history."""
+        """Handle list selection from history and eval panels."""
         if event.list_view.id == "run-list" and event.item:
+            # Run history selection
             run_id = getattr(event.item, "run_id", None)
             if run_id:
-                history_panel = self.query_one("#history-panel", RunHistoryPanel)
+                history_panel = self.query_one("#left-panel", SidebarPanel)
                 run = history_panel.get_run_by_id(run_id)
                 if run:
                     self._show_run_details(run)
+        elif event.list_view.id == "evaluator-templates-list" and event.item:
+            # Evaluator template selection - show creation form in right panel
+            type_id = getattr(event.item, "type_id", None)
+            type_def = getattr(event.item, "type_def", None)
+            if type_id and type_def:
+                self._on_evaluator_type_selected(type_id, type_def)
+
+    # =========================================================================
+    # List Panel Callbacks
+    # =========================================================================
+
+    def _on_add_evaluation_clicked(self) -> None:
+        """Handle add evaluation button click - show add evaluation form."""
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if not eval_sets_tab:
+            return
+
+        eval_set_data = eval_sets_tab.get_current_eval_set_data()
+        if not eval_set_data:
+            self.notify("Please select an eval set first", severity="error")
+            return
+
+        # Show evaluation edit panel in add mode (no evaluation data)
+        self._show_right_edit_panel(tab_name="Edit")
+        edit_panel = self.query_one("#evaluation-edit-panel", EvaluationEditPanel)
+        edit_panel.remove_class("hidden")
+        edit_panel.set_data(
+            evaluation=None,  # Add mode
+            eval_set_data=eval_set_data,
+            eval_set_path=eval_sets_tab.get_selected_eval_set_path(),
+        )
+
+    def _on_evaluation_selected(self, eval_data: dict[str, Any]) -> None:
+        """Handle evaluation selection - show edit form."""
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if not eval_sets_tab:
+            return
+
+        eval_set_data = eval_sets_tab.get_current_eval_set_data()
+        if not eval_set_data:
+            return
+
+        # Show evaluation edit panel in edit mode
+        self._show_right_edit_panel(tab_name="Edit")
+        edit_panel = self.query_one("#evaluation-edit-panel", EvaluationEditPanel)
+        edit_panel.remove_class("hidden")
+        edit_panel.set_data(
+            evaluation=eval_data,
+            eval_set_data=eval_set_data,
+            eval_set_path=eval_sets_tab.get_selected_eval_set_path(),
+        )
+
+    def _on_assign_evaluator_clicked(self) -> None:
+        """Handle assign evaluator button click - show assign form."""
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if not eval_sets_tab:
+            return
+
+        eval_set_data = eval_sets_tab.get_current_eval_set_data()
+        if not eval_set_data:
+            self.notify("Please select an eval set first", severity="error")
+            return
+
+        # Calculate unassigned evaluators
+        assigned_refs = set(eval_set_data.get("evaluatorRefs", []))
+        all_evaluators = self.evaluator_service.list_evaluators()
+        unassigned = [ev for ev in all_evaluators if ev.get("id") not in assigned_refs]
+
+        if not unassigned:
+            self.notify("All evaluators are already assigned", severity="warning")
+            return
+
+        # Show assign evaluator panel
+        self._show_right_edit_panel(tab_name="Assign")
+        assign_panel = self.query_one("#assign-evaluator-panel", AssignEvaluatorPanel)
+        assign_panel.remove_class("hidden")
+        assign_panel.set_data(
+            unassigned=unassigned,
+            eval_set_data=eval_set_data,
+            eval_set_path=eval_sets_tab.get_selected_eval_set_path(),
+        )
+
+    def _on_eval_run_selected(self, eval_run: EvalRun) -> None:
+        """Handle eval run selection from EvalRunsListPanel."""
+        asyncio.create_task(self._show_eval_run_detail(eval_run))
+
+    # =========================================================================
+    # Evaluator Form Panel Callbacks
+    # =========================================================================
+
+    def _on_evaluator_form_close(self) -> None:
+        """Handle close from EvaluatorFormPanel."""
+        self._hide_evaluator_create_panel()
+
+    def _on_create_eval_set_clicked(self) -> None:
+        """Handle create eval set button click from sidebar."""
+        self._show_create_panel()
+
+    def _on_evaluator_selected(self, ev_data: dict[str, Any]) -> None:
+        """Handle evaluator selection from sidebar - show edit form."""
+        evaluator_id = ev_data.get("id", "")
+        if not evaluator_id:
+            return
+
+        self._set_panel_visibility(
+            {
+                "#new-run-tabs": False,
+                "#eval-tabs": False,
+                "#create-tabs": False,
+                "#evaluator-create-tabs": True,
+            }
+        )
+        self._hide_right_edit_panel()
+
+        try:
+            form_panel = self.query_one("#evaluator-form-panel", EvaluatorFormPanel)
+            asyncio.create_task(form_panel.show_edit_form(evaluator_id, ev_data))
+        except Exception as e:
+            self.notify(f"Error showing evaluator edit: {e}", severity="error")
+
+    def _on_new_evaluator_clicked(self) -> None:
+        """Handle new evaluator button click from sidebar - show templates."""
+        self._on_show_evaluator_templates()
+
+    def _on_hide_create_panel(self) -> None:
+        """Handle close button click from EvalSetCreatePanel."""
+        self._hide_create_panel()
+        self._show_eval_sets_tabs()
 
     @on(Input.Submitted, "#chat-input")
     async def handle_chat_input(self, event: Input.Submitted) -> None:
@@ -177,12 +462,260 @@ async def handle_chat_input(self, event: Input.Submitted) -> None:
 
     async def action_new_run(self) -> None:
         """Show new run panel."""
-        new_panel = self.query_one("#new-run-panel")
         details_panel = self.query_one("#details-panel")
 
-        new_panel.remove_class("hidden")
+        self._show_new_run_panel()
         details_panel.add_class("hidden")
 
+    def _set_panel_visibility(self, visibility: dict[str, bool]) -> None:
+        """Set visibility for multiple panels at once.
+
+        Args:
+            visibility: Dict mapping panel IDs to visibility (True=show, False=hide)
+        """
+        try:
+            for panel_id, visible in visibility.items():
+                panel = self.query_one(panel_id)
+                if visible:
+                    panel.remove_class("hidden")
+                else:
+                    panel.add_class("hidden")
+        except Exception:
+            pass
+
+    def _show_new_run_panel(self) -> None:
+        """Show the New Run panel and hide Eval Sets tabs."""
+        self._set_panel_visibility(
+            {
+                "#new-run-tabs": True,
+                "#eval-tabs": False,
+                "#create-tabs": False,
+                "#evaluator-create-tabs": False,
+                "#details-panel": False,
+                "#right-edit-panel": False,
+            }
+        )
+
+    def _show_eval_sets_tabs(self) -> None:
+        """Show the Evaluations/Evaluators tabs (for Eval Sets mode)."""
+        self._set_panel_visibility(
+            {
+                "#new-run-tabs": False,
+                "#eval-tabs": True,
+                "#create-tabs": False,
+                "#evaluator-create-tabs": False,
+                "#details-panel": False,
+                "#right-edit-panel": False,
+            }
+        )
+        try:
+            eval_tabs = self.query_one("#eval-tabs", TabbedContent)
+            eval_tabs.active = "evaluations-tab"
+        except Exception:
+            pass
+        self._populate_eval_lists()
+
+    def _on_eval_set_changed(self) -> None:
+        """Handle eval set selection change - refresh the middle panel lists."""
+        self._populate_eval_lists()
+
+    def _show_right_edit_panel(self, tab_name: str = "Edit") -> None:
+        """Show the right edit panel container and hide all sub-panels.
+
+        Args:
+            tab_name: Name to display on the tab (default "Edit", use "Details" for run details)
+        """
+        try:
+            right_edit_panel = self.query_one("#right-edit-panel")
+            edit_tabs = self.query_one("#edit-tabs", TabbedContent)
+
+            right_edit_panel.remove_class("hidden")
+            edit_tabs.remove_class("hidden")
+
+            # Hide all pre-composed sub-panels
+            self._hide_all_right_sub_panels()
+
+            # Hide eval-run-tabs when showing edit panels
+            self.query_one("#eval-run-tabs", TabbedContent).add_class("hidden")
+
+            # Update tab label
+            try:
+                tab = edit_tabs.get_tab("edit-tab")
+                tab.label = tab_name
+            except Exception:
+                pass
+        except Exception:
+            pass
+
+    def _hide_all_right_sub_panels(self) -> None:
+        """Hide all pre-composed panels in the right edit panel."""
+        try:
+            self.query_one("#evaluation-edit-panel", EvaluationEditPanel).add_class(
+                "hidden"
+            )
+        except Exception:
+            pass
+        try:
+            self.query_one("#assign-evaluator-panel", AssignEvaluatorPanel).add_class(
+                "hidden"
+            )
+        except Exception:
+            pass
+        try:
+            container = self.query_one(
+                "#evaluator-create-content", ScrollableContainer
+            )
+            container.add_class("hidden")
+            # Clear the container contents
+            for child in list(container.children):
+                child.remove()
+        except Exception:
+            pass
+
+    def _hide_right_edit_panel(self) -> None:
+        """Hide the right edit panel."""
+        # Clear eval run tracking
+        self._current_eval_run_id = None
+
+        try:
+            right_edit_panel = self.query_one("#right-edit-panel")
+            right_edit_panel.add_class("hidden")
+            self._hide_all_right_sub_panels()
+
+            # Hide eval-run-tabs and restore edit-tabs visibility
+            self.query_one("#eval-run-tabs", TabbedContent).add_class("hidden")
+            edit_tabs = self.query_one("#edit-tabs", TabbedContent)
+            edit_tabs.remove_class("hidden")
+        except Exception:
+            pass
+
+    def _show_create_panel(self) -> None:
+        """Show the create panel in the middle, hide the right panel."""
+        try:
+            eval_tabs = self.query_one("#eval-tabs", TabbedContent)
+            create_tabs = self.query_one("#create-tabs", TabbedContent)
+
+            # Hide eval tabs in middle, show create tabs
+            eval_tabs.add_class("hidden")
+            create_tabs.remove_class("hidden")
+
+            self._hide_right_edit_panel()
+
+            # Reset the pre-composed create panel
+            create_panel = self.query_one("#eval-set-create-panel", EvalSetCreatePanel)
+            create_panel.reset()
+        except Exception:
+            pass
+
+    def _hide_create_panel(self) -> None:
+        """Hide the create panel and restore eval tabs in middle."""
+        try:
+            eval_tabs = self.query_one("#eval-tabs", TabbedContent)
+            create_tabs = self.query_one("#create-tabs", TabbedContent)
+
+            # Hide create tabs, show eval tabs in middle
+            create_tabs.add_class("hidden")
+            eval_tabs.remove_class("hidden")
+
+            # Hide right panel
+            self._hide_right_edit_panel()
+        except Exception:
+            pass
+
+    def _show_evaluator_create_panel(self) -> None:
+        """Show the evaluator creation panel (empty placeholder until type is selected)."""
+        self._set_panel_visibility(
+            {
+                "#new-run-tabs": False,
+                "#eval-tabs": False,
+                "#create-tabs": False,
+                "#evaluator-create-tabs": True,
+                "#details-panel": False,
+                "#right-edit-panel": False,
+            }
+        )
+        try:
+            form_panel = self.query_one("#evaluator-form-panel", EvaluatorFormPanel)
+            form_panel.show_placeholder()
+        except Exception:
+            pass
+
+    def _hide_evaluator_create_panel(self) -> None:
+        """Hide the evaluator creation panel."""
+        try:
+            evaluator_create_tabs = self.query_one(
+                "#evaluator-create-tabs", TabbedContent
+            )
+            evaluator_create_tabs.add_class("hidden")
+        except Exception:
+            pass
+
+    def _on_show_evaluator_templates(self) -> None:
+        """Show templates list in middle panel."""
+        self._set_panel_visibility(
+            {
+                "#new-run-tabs": False,
+                "#eval-tabs": False,
+                "#create-tabs": False,
+                "#evaluator-create-tabs": True,
+            }
+        )
+        try:
+            form_panel = self.query_one("#evaluator-form-panel", EvaluatorFormPanel)
+            asyncio.create_task(form_panel.show_templates())
+        except Exception:
+            pass
+        self._hide_right_edit_panel()
+
+    def _on_evaluator_type_selected(
+        self, type_id: str, type_def: dict[str, Any]
+    ) -> None:
+        """Handle evaluator type/template selection - show creation form in right panel."""
+        # Show the right panel and the evaluator create container
+        self._show_right_edit_panel(tab_name="Create")
+        content = self.query_one("#evaluator-create-content", ScrollableContainer)
+        content.remove_class("hidden")
+
+        try:
+            form_panel = self.query_one("#evaluator-form-panel", EvaluatorFormPanel)
+            asyncio.create_task(
+                form_panel.populate_create_form_in_container(content, type_id, type_def)
+            )
+        except Exception as e:
+            self.notify(f"Error showing create form: {e}", severity="error")
+
+    def _populate_eval_lists(self) -> None:
+        """Populate the evaluations, runs, and evaluators lists via the panels."""
+        try:
+            history_panel = self.query_one("#left-panel", SidebarPanel)
+            eval_sets_panel = history_panel.get_eval_sets_tab()
+            if not eval_sets_panel:
+                return
+
+            # Update evaluations list panel with current data
+            eval_set_data = eval_sets_panel.current_eval_set_data
+
+            evaluations_panel = self.query_one(
+                "#evaluations-list-panel", EvaluationsListPanel
+            )
+            evaluations_panel.set_eval_set_data(eval_set_data)
+
+            # Update runs list panel with selected eval set
+            runs_panel = self.query_one("#runs-list-panel", EvalRunsListPanel)
+            runs_panel.set_eval_set(eval_sets_panel.selected_eval_set)
+        except Exception:
+            pass
+
+    async def _refresh_evaluators_list(self) -> None:
+        """Refresh the evaluators list in the sidebar."""
+        try:
+            history_panel = self.query_one("#left-panel", SidebarPanel)
+            evaluators_tab = history_panel.get_evaluators_tab()
+            if evaluators_tab:
+                await evaluators_tab.refresh_list()
+        except Exception:
+            pass
+
     async def action_cancel(self) -> None:
         """Cancel and return to new run view."""
         await self.action_new_run()
@@ -202,7 +735,7 @@ async def action_execute_run(self, mode: ExecutionMode = ExecutionMode.RUN) -> N
 
         run = ExecutionRun(entrypoint, input_payload, mode=mode)
 
-        history_panel = self.query_one("#history-panel", RunHistoryPanel)
+        history_panel = self.query_one("#left-panel", SidebarPanel)
         history_panel.add_run(run)
 
         self.run_service.register_run(run)
@@ -237,10 +770,88 @@ async def action_debug_stop(self) -> None:
 
     async def action_clear_history(self) -> None:
         """Clear run history."""
-        history_panel = self.query_one("#history-panel", RunHistoryPanel)
+        history_panel = self.query_one("#left-panel", SidebarPanel)
         history_panel.clear_runs()
         await self.action_new_run()
 
+    def action_show_evaluations(self) -> None:
+        """Switch to the eval sets tab in the sidebar."""
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        history_panel.switch_to_eval_sets()
+
+    async def action_run_eval(self) -> None:
+        """Execute an evaluation run based on EvalSetsTab inputs.
+
+        This is the eval equivalent of action_execute_run.
+        """
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_panel = history_panel.get_eval_sets_tab()
+        if not eval_sets_panel:
+            return
+
+        # Validate
+        if not eval_sets_panel.selected_eval_set:
+            self.notify("Please select an eval set", severity="error")
+            return
+
+        eval_run = eval_sets_panel.create_eval_run()
+
+        self.eval_run_service.register_run(eval_run)
+
+        try:
+            runs_panel = self.query_one("#runs-list-panel", EvalRunsListPanel)
+            runs_panel.add_run(eval_run)
+        except Exception:
+            pass
+
+        # Set the current eval run ID before switching tabs to prevent the tab change
+        # handler from hiding the right panel
+        self._current_eval_run_id = eval_run.id
+
+        # Switch to Runs tab
+        try:
+            eval_tabs = self.query_one("#eval-tabs", TabbedContent)
+            eval_tabs.active = "runs-tab"
+        except Exception:
+            pass
+
+        # Show the run details panel automatically
+        await self._show_eval_run_detail(eval_run)
+
+        asyncio.create_task(self._execute_eval_run(eval_run))
+
+    def _on_eval_run_started(self, eval_run: EvalRun) -> None:
+        """Handle evaluation run started - register with service and update UI."""
+        self.eval_run_service.register_run(eval_run)
+
+        # Switch to Runs tab
+        try:
+            eval_tabs = self.query_one("#eval-tabs", TabbedContent)
+            eval_tabs.active = "runs-tab"
+        except Exception:
+            pass
+
+    def _on_eval_run_updated(self, eval_run: EvalRun) -> None:
+        """Handle evaluation run updated - refresh the UI."""
+        # Targeted update of the specific run item in the runs list
+        try:
+            runs_panel = self.query_one("#runs-list-panel", EvalRunsListPanel)
+            runs_panel.update_run(eval_run)
+        except Exception:
+            pass
+
+        # If this run is currently displayed in the right panel, refresh it
+        if self._current_eval_run_id and self._current_eval_run_id == eval_run.id:
+            asyncio.create_task(self._show_eval_run_detail(eval_run))
+
+    def _on_sidebar_run_selected(self, run: ExecutionRun) -> None:
+        """Handle run selection from sidebar."""
+        self._show_run_details(run)
+
+    def _on_new_run_clicked(self) -> None:
+        """Handle new run button click from sidebar."""
+        asyncio.create_task(self.action_new_run())
+
     def action_copy(self) -> None:
         """Copy content of currently focused RichLog to clipboard and notify."""
         focused = self.app.focused
@@ -258,11 +869,19 @@ async def _execute_runtime(self, run: ExecutionRun) -> None:
     async def _resume_runtime(self, run: ExecutionRun, resume_data: Any) -> None:
         """Wrapper that delegates execution to RunService."""
         await self.run_service.resume_debug(run, resume_data)
+    async def _execute_eval_run(self, eval_run: EvalRun) -> None:
+        """Wrapper that delegates eval execution to EvalService."""
+        try:
+            await self.eval_run_service.execute(eval_run)
+            self.notify("Evaluations completed!", timeout=3)
+        except Exception as e:
+            error_str = str(e).replace("[", r"\[").replace("]", r"\]")
+            self.notify(f"Evaluation failed: {error_str}", severity="error", timeout=5)
 
     def _on_run_updated(self, run: ExecutionRun) -> None:
         """Called whenever a run changes (status, times, logs, traces)."""
         # Update the run in history
-        history_panel = self.query_one("#history-panel", RunHistoryPanel)
+        history_panel = self.query_one("#left-panel", SidebarPanel)
         history_panel.update_run(run)
 
         # If this run is currently shown, refresh details
@@ -280,6 +899,22 @@ def _on_trace_for_ui(self, trace_msg: TraceMessage) -> None:
         details_panel = self.query_one("#details-panel", RunDetailsPanel)
         details_panel.add_trace(trace_msg)
 
+    def _on_eval_log_for_ui(self, log_msg: LogMessage) -> None:
+        """Append a log message to the eval run details UI."""
+        try:
+            details_panel = self.query_one("#eval-run-details-panel", EvalRunDetailsPanel)
+            details_panel.add_log(log_msg)
+        except Exception:
+            pass
+
+    def _on_eval_trace_for_ui(self, trace_msg: TraceMessage) -> None:
+        """Append/refresh traces in the eval run details UI."""
+        try:
+            details_panel = self.query_one("#eval-run-details-panel", EvalRunDetailsPanel)
+            details_panel.add_trace(trace_msg)
+        except Exception:
+            pass
+
     def _on_chat_for_ui(
         self,
         chat_msg: ChatMessage,
@@ -290,13 +925,356 @@ def _on_chat_for_ui(
 
     def _show_run_details(self, run: ExecutionRun) -> None:
         """Show details panel for a specific run."""
-        new_panel = self.query_one("#new-run-panel")
+        self._set_panel_visibility(
+            {
+                "#new-run-tabs": False,
+                "#eval-tabs": False,
+                "#right-edit-panel": False,
+                "#details-panel": True,
+            }
+        )
         details_panel = self.query_one("#details-panel", RunDetailsPanel)
+        details_panel.update_run(run)
 
-        new_panel.add_class("hidden")
-        details_panel.remove_class("hidden")
+    async def _show_eval_run_detail(self, eval_run: EvalRun) -> None:
+        """Show eval run details in the right panel."""
+        # Track which eval run is displayed for auto-refresh
+        self._current_eval_run_id = eval_run.id
 
-        details_panel.update_run(run)
+        # Show right panel container
+        right_edit_panel = self.query_one("#right-edit-panel")
+        right_edit_panel.remove_class("hidden")
+
+        # Hide the edit-tabs TabbedContent and show eval-run-tabs
+        edit_tabs = self.query_one("#edit-tabs", TabbedContent)
+        edit_tabs.add_class("hidden")
+
+        eval_run_tabs = self.query_one("#eval-run-tabs", TabbedContent)
+        eval_run_tabs.remove_class("hidden")
+
+        details_panel = self.query_one("#eval-run-details-panel", EvalRunDetailsPanel)
+        details_panel.update_run(eval_run)
+
+    # =========================================================================
+    # Action-Specific Handlers
+    # =========================================================================
+
+    def _on_eval_set_create(self, form_data: dict[str, Any]) -> None:
+        """Handle eval set creation - App calls service.
+
+        Args:
+            form_data: Dict with eval_set_id, name, evaluator_refs, evaluations.
+        """
+        eval_set_id = form_data.get("eval_set_id", "")
+        if not eval_set_id:
+            self.notify("Eval set ID is required", severity="error")
+            return
+
+        # Build the eval set data structure
+        eval_set_data = {
+            "name": form_data.get("name", eval_set_id),
+            "evaluatorRefs": form_data.get("evaluator_refs", []),
+            "evaluations": form_data.get("evaluations", []),
+        }
+
+        # Call service to persist
+        self.eval_set_service.save_eval_set(eval_set_id, eval_set_data)
+
+        # Refresh UI
+        self._hide_create_panel()
+        self._show_eval_sets_tabs()
+
+        # Refresh eval sets dropdown and select the new one
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if eval_sets_tab:
+            eval_sets_tab.refresh_eval_sets()
+            eval_sets_tab.select_eval_set(eval_set_id)
+
+        self.notify(f"Eval set '{eval_set_id}' created!", timeout=3)
+
+    def _on_create_evaluator_btn_clicked(self) -> None:
+        """Handle create evaluator button click from the external container."""
+        form_panel = self.query_one("#evaluator-form-panel", EvaluatorFormPanel)
+        form_data = form_panel.get_create_form_data()
+        if form_data:
+            form_data["is_create"] = True
+            self._on_evaluator_save(form_data)
+
+    def _on_evaluator_save(self, form_data: dict[str, Any]) -> None:
+        """Handle evaluator save.
+
+        Args:
+            form_data: Dict with evaluator_id, type_id, description, config,
+                       default_criteria, is_create.
+        """
+        evaluator_id = form_data.get("evaluator_id", "")
+        if not evaluator_id:
+            self.notify("Evaluator ID is required", severity="error")
+            return
+
+        is_create = form_data.get("is_create", False)
+
+        # Build the evaluator data structure expected by the service
+        evaluator_data = {
+            "id": evaluator_id,
+            "description": form_data.get("description", ""),
+            "evaluatorTypeId": form_data.get("type_id", ""),
+            "config": form_data.get("config", {}),
+            "defaultCriteria": form_data.get("default_criteria", {}),
+        }
+
+        # Call service to persist
+        self.evaluator_service.save_evaluator(evaluator_id, evaluator_data)
+
+        # Refresh UI
+        self._hide_right_edit_panel()
+        asyncio.create_task(self._refresh_evaluators_list())
+
+        action = "created" if is_create else "saved"
+        self.notify(f"Evaluator '{evaluator_id}' {action}!", timeout=3)
+
+    def _on_evaluator_delete(self, evaluator_id: str) -> None:
+        """Handle evaluator delete - App calls service.
+
+        Args:
+            evaluator_id: The ID of the evaluator to delete.
+        """
+        if not evaluator_id:
+            return
+
+        # Call service to delete
+        self.evaluator_service.delete_evaluator(evaluator_id)
+
+        # Refresh UI
+        self._hide_evaluator_create_panel()
+        asyncio.create_task(self._refresh_evaluators_list())
+
+        self.notify(f"Evaluator '{evaluator_id}' deleted!", timeout=3)
+
+    def _on_evaluation_save(self, form_data: dict[str, Any]) -> None:
+        """Handle evaluation save - App calls service.
+
+        Args:
+            form_data: Dict with id, name, inputs, evaluationCriteria, is_add_mode, eval_set_path.
+        """
+        eval_set_path = form_data.get("eval_set_path", "")
+        if not eval_set_path:
+            self.notify("No eval set selected", severity="error")
+            return
+
+        # Get the eval set name from the path
+        eval_set_name = Path(eval_set_path).stem
+
+        # Get current eval set data
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if not eval_sets_tab:
+            return
+
+        eval_set_data = eval_sets_tab.get_current_eval_set_data()
+        if not eval_set_data:
+            return
+
+        # Build the evaluation entry
+        evaluation_entry = {
+            "id": form_data.get("id"),
+            "name": form_data.get("name"),
+            "inputs": form_data.get("inputs", {}),
+            "evaluationCriterias": form_data.get("evaluationCriterias", {}),
+        }
+
+        # Update evaluations list
+        evaluations = eval_set_data.get("evaluations", [])
+        is_add_mode = form_data.get("is_add_mode", True)
+
+        if is_add_mode:
+            evaluations.append(evaluation_entry)
+        else:
+            # Update existing evaluation
+            eval_id = form_data.get("id")
+            for i, ev in enumerate(evaluations):
+                if ev.get("id") == eval_id:
+                    evaluations[i] = evaluation_entry
+                    break
+
+        eval_set_data["evaluations"] = evaluations
+
+        # Call service to persist
+        self.eval_set_service.save_eval_set(eval_set_name, eval_set_data)
+
+        # Update the tab's cached data
+        eval_sets_tab.set_current_eval_set_data(eval_set_data)
+
+        # Refresh UI
+        self._hide_right_edit_panel()
+
+        evaluations_panel = self.query_one(
+            "#evaluations-list-panel", EvaluationsListPanel
+        )
+        evaluations_panel.set_eval_set_data(eval_set_data)
+
+        action = "added" if is_add_mode else "saved"
+        self.notify(f"Evaluation {action}!", timeout=3)
+
+    def _on_evaluation_delete(self, eval_id: str) -> None:
+        """Handle evaluation delete - App calls service.
+
+        Args:
+            eval_id: The ID of the evaluation to delete.
+        """
+        if not eval_id:
+            return
+
+        # Get current eval set data
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if not eval_sets_tab:
+            return
+
+        eval_set_data = eval_sets_tab.get_current_eval_set_data()
+        eval_set_path = eval_sets_tab.get_selected_eval_set_path()
+        if not eval_set_data or not eval_set_path:
+            return
+
+        eval_set_name = Path(eval_set_path).stem
+
+        # Remove the evaluation from the list
+        evaluations = eval_set_data.get("evaluations", [])
+        eval_set_data["evaluations"] = [
+            ev for ev in evaluations if ev.get("id") != eval_id
+        ]
+
+        # Call service to persist
+        self.eval_set_service.save_eval_set(eval_set_name, eval_set_data)
+
+        # Update the tab's cached data
+        eval_sets_tab.set_current_eval_set_data(eval_set_data)
+
+        # Refresh UI
+        self._hide_right_edit_panel()
+
+        evaluations_panel = self.query_one(
+            "#evaluations-list-panel", EvaluationsListPanel
+        )
+        evaluations_panel.set_eval_set_data(eval_set_data)
+
+        self.notify("Evaluation deleted!", timeout=3)
+
+    def _on_evaluators_assign(self, evaluator_ids: list[str]) -> None:
+        """Handle evaluators assign - App calls service.
+
+        Args:
+            evaluator_ids: List of evaluator IDs to assign.
+        """
+        if not evaluator_ids:
+            self.notify("Please select at least one evaluator", severity="error")
+            return
+
+        # Get current eval set data
+        history_panel = self.query_one("#left-panel", SidebarPanel)
+        eval_sets_tab = history_panel.get_eval_sets_tab()
+        if not eval_sets_tab:
+            return
+
+        eval_set_data = eval_sets_tab.get_current_eval_set_data()
+        eval_set_path = eval_sets_tab.get_selected_eval_set_path()
+        if not eval_set_data or not eval_set_path:
+            return
+
+        eval_set_name = Path(eval_set_path).stem
+
+        # Add new evaluator refs
+        existing_refs = set(eval_set_data.get("evaluatorRefs", []))
+        existing_refs.update(evaluator_ids)
+        eval_set_data["evaluatorRefs"] = list(existing_refs)
+
+        # Call service to persist
+        self.eval_set_service.save_eval_set(eval_set_name, eval_set_data)
+
+        # Update the tab's cached data
+        eval_sets_tab.set_current_eval_set_data(eval_set_data)
+
+        # Refresh UI
+        self._hide_right_edit_panel()
+
+        evaluations_panel = self.query_one(
+            "#evaluations-list-panel", EvaluationsListPanel
+        )
+        evaluations_panel.set_eval_set_data(eval_set_data)
+
+        count = len(evaluator_ids)
+        self.notify(f"{count} evaluator(s) assigned!", timeout=3)
+
+    def _refresh_evaluations_list(self) -> None:
+        """Refresh the evaluations list panel with updated data."""
+        try:
+            updated_data = None
+            try:
+                edit_panel = self.query_one(
+                    "#evaluation-edit-panel", EvaluationEditPanel
+                )
+                updated_data = edit_panel.get_updated_eval_set_data()
+            except Exception:
+                pass
+
+            if not updated_data:
+                try:
+                    assign_panel = self.query_one(
+                        "#assign-evaluator-panel", AssignEvaluatorPanel
+                    )
+                    updated_data = assign_panel.get_updated_eval_set_data()
+                except Exception:
+                    pass
+
+            if updated_data:
+                # Update the eval_sets_tab state
+                history_panel = self.query_one("#left-panel", SidebarPanel)
+                eval_sets_tab = history_panel.get_eval_sets_tab()
+                if eval_sets_tab:
+                    eval_sets_tab.set_current_eval_set_data(updated_data)
+
+                # Refresh the evaluations list
+                evaluations_panel = self.query_one(
+                    "#evaluations-list-panel", EvaluationsListPanel
+                )
+                evaluations_panel.set_eval_set_data(updated_data)
+        except Exception:
+            pass
+
+    def _on_hide_eval_set_detail(self) -> None:
+        """Hide the detail panel (either create or edit mode)."""
+        # Check if create panel is visible
+        try:
+            create_tabs = self.query_one("#create-tabs", TabbedContent)
+            if not create_tabs.has_class("hidden"):
+                self._hide_create_panel()
+                return
+        except Exception:
+            pass
+
+        # Otherwise hide the right edit panel
+        self._hide_right_edit_panel()
+
+    @on(TabbedContent.TabActivated, "#history-tabs")
+    def _on_sidebar_tab_changed(self, event: TabbedContent.TabActivated) -> None:
+        """Handle sidebar tab changes to update main content."""
+        if event.tab.id == "run-history-tab--tab" or event.pane.id == "run-history-tab":
+            # Run History selected - show New Run panel
+            self._show_new_run_panel()
+        elif event.tab.id == "eval-sets-tab--tab" or event.pane.id == "eval-sets-tab":
+            # Eval Sets selected - show Evaluations/Evaluators tabs
+            self._show_eval_sets_tabs()
+        elif event.tab.id == "evaluators-tab--tab" or event.pane.id == "evaluators-tab":
+            # Evaluators selected - show evaluator creation panel (empty until type selected)
+            self._show_evaluator_create_panel()
+
+    @on(TabbedContent.TabActivated, "#eval-tabs")
+    def _on_eval_tabs_changed(self, event: TabbedContent.TabActivated) -> None:
+        """Handle middle panel tab changes (Runs/Evaluations) to hide right panel."""
+        if event.pane.id == "runs-tab" and self._current_eval_run_id:
+            return
+        self._hide_right_edit_panel()
 
     def _focus_chat_input(self) -> None:
         """Focus the chat input box."""
diff --git a/src/uipath/dev/models/__init__.py b/src/uipath/dev/models/__init__.py
index fce14a7..db5377c 100644
--- a/src/uipath/dev/models/__init__.py
+++ b/src/uipath/dev/models/__init__.py
@@ -1,12 +1,22 @@
 """UiPath Dev Console models module."""
 
+from uipath.dev.models.eval_run import EvalRun, EvaluationResult, EvaluatorResult
+from uipath.dev.models.evaluator_types import (
+    EVALUATOR_TYPES,
+    get_evaluator_type,
+)
 from uipath.dev.models.execution import ExecutionMode, ExecutionRun
 from uipath.dev.models.messages import ChatMessage, LogMessage, TraceMessage
 
 __all__ = [
-    "ExecutionRun",
-    "ExecutionMode",
+    "EVALUATOR_TYPES",
     "ChatMessage",
+    "EvalRun",
+    "EvaluationResult",
+    "EvaluatorResult",
+    "ExecutionMode",
+    "ExecutionRun",
     "LogMessage",
     "TraceMessage",
+    "get_evaluator_type",
 ]
diff --git a/src/uipath/dev/models/eval_run.py b/src/uipath/dev/models/eval_run.py
new file mode 100644
index 0000000..55eca28
--- /dev/null
+++ b/src/uipath/dev/models/eval_run.py
@@ -0,0 +1,247 @@
+"""Model for evaluation runs."""
+
+import os
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import TYPE_CHECKING, Any
+from uuid import uuid4
+
+from rich.text import Text
+from uipath.runtime.errors import UiPathErrorContract
+
+if TYPE_CHECKING:
+    from uipath.dev.models.messages import LogMessage, TraceMessage
+
+
+@dataclass
+class EvaluatorResult:
+    """Result from a single evaluator for a single evaluation."""
+
+    evaluator_id: str
+    evaluator_name: str
+    score: float
+    details: str = ""
+    evaluation_time: float = 0.0
+    justification: str = ""
+
+
+@dataclass
+class EvaluationResult:
+    """Result for a single evaluation."""
+
+    eval_id: str
+    eval_name: str
+    evaluator_results: list[EvaluatorResult] = field(default_factory=list)
+
+    @property
+    def passed(self) -> bool:
+        """Check if all evaluators passed (score == 1.0)."""
+        return all(r.score == 1.0 for r in self.evaluator_results)
+
+
+class EvalRun:
+    """A single evaluation run."""
+
+    def __init__(
+        self,
+        eval_set_path: str,
+        entrypoint: str,
+        *,
+        id: str | None = None,
+        name: str = "",
+        no_report: bool = False,
+        workers: int = 1,
+        eval_set_run_id: str | None = None,
+        enable_mocker_cache: bool = False,
+        eval_ids: list[str] | None = None,
+        report_coverage: bool = False,
+        output_file: str | None = None,
+        # For deserialization
+        status: str = "pending",
+        start_time: datetime | None = None,
+        end_time: datetime | None = None,
+        evaluator_refs: list[str] | None = None,
+        error: UiPathErrorContract | None = None,
+    ):
+        """Initialize an EvalRun instance."""
+        self.id = id if id is not None else str(uuid4())[:8]
+        self.eval_set_path = eval_set_path
+        self.entrypoint = entrypoint
+        self.name = name if name else f"Run: {self.id}"
+        self.status = status  # pending, running, completed, failed
+        self.start_time = start_time if start_time is not None else datetime.now()
+        self.end_time = end_time
+        self.evaluator_refs: list[str] = evaluator_refs if evaluator_refs is not None else []
+        self.evaluation_results: list[EvaluationResult] = []
+        self.error = error
+        self.logs: list["LogMessage"] = []
+        self.traces: list["TraceMessage"] = []
+        # Execution options
+        self.no_report = no_report
+        self.workers = workers
+        self.eval_set_run_id = eval_set_run_id
+        self.enable_mocker_cache = enable_mocker_cache
+        self.eval_ids: list[str] = eval_ids if eval_ids is not None else []
+        self.report_coverage = report_coverage
+        self.output_file = output_file
+
+    @property
+    def duration(self) -> str:
+        """Get the duration of the run as a formatted string."""
+        if self.end_time:
+            delta = self.end_time - self.start_time
+            return f"{delta.total_seconds():.1f}s"
+        elif self.start_time:
+            delta = datetime.now() - self.start_time
+            return f"{delta.total_seconds():.1f}s"
+        return "0.0s"
+
+    @property
+    def display_name(self) -> Text:
+        """Get formatted display name with status indicator."""
+        status_colors = {
+            "pending": "grey50",
+            "running": "yellow",
+            "completed": "green",
+            "failed": "red",
+        }
+
+        status_icon = {
+            "pending": "●",
+            "running": "▶",
+            "completed": "✔",
+            "failed": "✖",
+        }.get(self.status, "?")
+
+        eval_set_name = (
+            os.path.basename(self.eval_set_path).rsplit(".", 1)[0]
+            if self.eval_set_path
+            else "eval"
+        )
+        truncated_name = eval_set_name[:8]
+        time_str = self.start_time.strftime("%H:%M:%S")
+        duration_str = self.duration[:6]
+
+        text = Text()
+        text.append(f"{status_icon:<2} ", style=status_colors.get(self.status, "white"))
+        text.append(f"{truncated_name:<8} ")
+        text.append(f"({time_str:<8}) ")
+        text.append(f"[{duration_str:<6}]")
+
+        return text
+
+    @property
+    def total_evaluations(self) -> int:
+        """Get total number of evaluations."""
+        return len(self.evaluation_results)
+
+    @property
+    def evaluator_scores(self) -> dict[str, float]:
+        """Get average score per evaluator across all evaluations."""
+        scores: dict[str, list[float]] = {}
+        for eval_result in self.evaluation_results:
+            for ev_result in eval_result.evaluator_results:
+                if ev_result.evaluator_id not in scores:
+                    scores[ev_result.evaluator_id] = []
+                scores[ev_result.evaluator_id].append(ev_result.score)
+
+        return {
+            ev_id: sum(s) / len(s) if s else 0.0
+            for ev_id, s in scores.items()
+        }
+
+    @property
+    def overall_score(self) -> float:
+        """Get overall average score."""
+        all_scores = []
+        for eval_result in self.evaluation_results:
+            for ev_result in eval_result.evaluator_results:
+                all_scores.append(ev_result.score)
+        return sum(all_scores) / len(all_scores) if all_scores else 0.0
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "id": self.id,
+            "name": self.name,
+            "eval_set_path": self.eval_set_path,
+            "entrypoint": self.entrypoint,
+            "status": self.status,
+            "start_time": self.start_time.isoformat(),
+            "end_time": self.end_time.isoformat() if self.end_time else None,
+            "evaluator_refs": self.evaluator_refs,
+            "evaluation_results": [
+                {
+                    "eval_id": er.eval_id,
+                    "eval_name": er.eval_name,
+                    "evaluator_results": [
+                        {
+                            "evaluator_id": evr.evaluator_id,
+                            "evaluator_name": evr.evaluator_name,
+                            "score": evr.score,
+                            "details": evr.details,
+                            "evaluation_time": evr.evaluation_time,
+                            "justification": evr.justification,
+                        }
+                        for evr in er.evaluator_results
+                    ],
+                }
+                for er in self.evaluation_results
+            ],
+            "error": self.error.to_dict() if self.error else None,
+            # Execution options
+            "no_report": self.no_report,
+            "workers": self.workers,
+            "eval_set_run_id": self.eval_set_run_id,
+            "enable_mocker_cache": self.enable_mocker_cache,
+            "eval_ids": self.eval_ids,
+            "report_coverage": self.report_coverage,
+            "output_file": self.output_file,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "EvalRun":
+        """Create from dictionary."""
+        error_data = data.get("error")
+        error = UiPathErrorContract.from_dict(error_data) if error_data else None
+
+        eval_run = cls(
+            id=data["id"],
+            name=data.get("name", ""),
+            eval_set_path=data["eval_set_path"],
+            entrypoint=data["entrypoint"],
+            status=data.get("status", "pending"),
+            start_time=datetime.fromisoformat(data["start_time"]) if data.get("start_time") else None,
+            end_time=datetime.fromisoformat(data["end_time"]) if data.get("end_time") else None,
+            evaluator_refs=data.get("evaluator_refs", []),
+            error=error,
+            # Execution options
+            no_report=data.get("no_report", False),
+            workers=data.get("workers", 1),
+            eval_set_run_id=data.get("eval_set_run_id"),
+            enable_mocker_cache=data.get("enable_mocker_cache", False),
+            eval_ids=data.get("eval_ids", []),
+            report_coverage=data.get("report_coverage", False),
+            output_file=data.get("output_file"),
+        )
+
+        # Parse evaluation results
+        for er_data in data.get("evaluation_results", []):
+            eval_result = EvaluationResult(
+                eval_id=er_data["eval_id"],
+                eval_name=er_data.get("eval_name", er_data["eval_id"]),
+            )
+            for evr_data in er_data.get("evaluator_results", []):
+                eval_result.evaluator_results.append(
+                    EvaluatorResult(
+                        evaluator_id=evr_data["evaluator_id"],
+                        evaluator_name=evr_data.get("evaluator_name", evr_data["evaluator_id"]),
+                        score=evr_data.get("score", 0.0),
+                        details=evr_data.get("details", ""),
+                        evaluation_time=evr_data.get("evaluation_time", 0.0),
+                        justification=evr_data.get("justification", ""),
+                    )
+                )
+            eval_run.evaluation_results.append(eval_result)
+
+        return eval_run
diff --git a/src/uipath/dev/models/evaluator_types.py b/src/uipath/dev/models/evaluator_types.py
new file mode 100644
index 0000000..3073ca7
--- /dev/null
+++ b/src/uipath/dev/models/evaluator_types.py
@@ -0,0 +1,557 @@
+"""Evaluator type definitions and schemas."""
+
+from typing import Any
+
+
+EVALUATOR_TYPES: dict[str, dict[str, Any]] = {
+    "uipath-contains": {
+        "name": "Contains Evaluator",
+        "description": "Checks if the response text includes the expected search text.",
+        "category": "output",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "ContainsEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "targetOutputKey",
+                "label": "Target Output Key",
+                "type": "string",
+                "default": "*",
+                "required": False,
+                "description": "Key to extract output from agent execution",
+            },
+            {
+                "name": "ignoreCase",
+                "label": "Ignore Case",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+            },
+            {
+                "name": "negated",
+                "label": "Negated",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+                "description": "If true, checks that text does NOT contain the search text",
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "searchText",
+                "label": "Search Text",
+                "type": "string",
+                "required": True,
+            },
+        ],
+    },
+    "uipath-exact-match": {
+        "name": "Exact Match Evaluator",
+        "description": "Checks if the response text exactly matches the expected value.",
+        "category": "output",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "ExactMatchEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "targetOutputKey",
+                "label": "Target Output Key",
+                "type": "string",
+                "default": "*",
+                "required": False,
+                "description": "Key to extract output from agent execution",
+            },
+            {
+                "name": "ignoreCase",
+                "label": "Ignore Case",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+            },
+            {
+                "name": "negated",
+                "label": "Negated",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "expectedOutput",
+                "label": "Expected Output",
+                "type": "json",
+                "required": True,
+                "description": "The expected output (string or object)",
+            },
+        ],
+    },
+    "uipath-json-similarity": {
+        "name": "JSON Similarity Evaluator",
+        "description": "Checks if the response JSON is similar to the expected JSON structure.",
+        "category": "output",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "JsonSimilarityEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "targetOutputKey",
+                "label": "Target Output Key",
+                "type": "string",
+                "default": "*",
+                "required": False,
+                "description": "Key to extract output from agent execution",
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "expectedOutput",
+                "label": "Expected Output",
+                "type": "json",
+                "required": True,
+                "description": "The expected JSON output",
+            },
+        ],
+    },
+    "uipath-llm-judge-output-semantic-similarity": {
+        "name": "LLM Judge Output Evaluator",
+        "description": "Uses an LLM to judge semantic similarity between expected and actual output.",
+        "category": "output",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "LLMJudgeOutputEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "targetOutputKey",
+                "label": "Target Output Key",
+                "type": "string",
+                "default": "*",
+                "required": False,
+            },
+            {
+                "name": "model",
+                "label": "Model",
+                "type": "string",
+                "default": "",
+                "required": False,
+                "description": "LLM model to use for judging",
+            },
+            {
+                "name": "prompt",
+                "label": "Prompt",
+                "type": "textarea",
+                "default": "As an expert evaluator, analyze the semantic similarity of these JSON contents to determine a score from 0-100. Focus on comparing the meaning and contextual equivalence of corresponding fields, accounting for alternative valid expressions, synonyms, and reasonable variations in language while maintaining high standards for accuracy and completeness. Provide your score with a justification, explaining briefly and concisely why you gave that score.\n----\nExpectedOutput:\n{{ExpectedOutput}}\n----\nActualOutput:\n{{ActualOutput}}",
+                "required": False,
+            },
+            {
+                "name": "temperature",
+                "label": "Temperature",
+                "type": "number",
+                "default": 0.0,
+                "required": False,
+            },
+            {
+                "name": "maxTokens",
+                "label": "Max Tokens",
+                "type": "integer",
+                "default": None,
+                "required": False,
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "expectedOutput",
+                "label": "Expected Output",
+                "type": "json",
+                "required": True,
+            },
+        ],
+    },
+    "uipath-llm-judge-output-strict-json-similarity": {
+        "name": "LLM Judge Strict JSON Similarity Evaluator",
+        "description": "Uses an LLM for strict JSON comparison between expected and actual output.",
+        "category": "output",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "LLMJudgeStrictJSONSimilarityOutputEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "targetOutputKey",
+                "label": "Target Output Key",
+                "type": "string",
+                "default": "*",
+                "required": False,
+            },
+            {
+                "name": "model",
+                "label": "Model",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "prompt",
+                "label": "Prompt",
+                "type": "textarea",
+                "default": "Compare the following JSON outputs for strict structural similarity.\n\nActual Output: {{ActualOutput}}\nExpected Output: {{ExpectedOutput}}\n\nEvaluate if the JSON structure and values match precisely. Provide a score from 0-100 where 100 means exact match and 0 means completely different.",
+                "required": False,
+            },
+            {
+                "name": "temperature",
+                "label": "Temperature",
+                "type": "number",
+                "default": 0.0,
+                "required": False,
+            },
+            {
+                "name": "maxTokens",
+                "label": "Max Tokens",
+                "type": "integer",
+                "default": None,
+                "required": False,
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "expectedOutput",
+                "label": "Expected Output",
+                "type": "json",
+                "required": True,
+            },
+        ],
+    },
+    "uipath-tool-call-order": {
+        "name": "Tool Call Order Evaluator",
+        "description": "Evaluates whether tools were called in the expected order.",
+        "category": "trajectory",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "ToolCallOrderEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "strict",
+                "label": "Strict",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+                "description": "If true, requires exact order match; otherwise allows subsequence",
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "toolCallsOrder",
+                "label": "Tool Calls Order",
+                "type": "string_array",
+                "required": True,
+                "description": "List of tool names in expected order",
+            },
+        ],
+    },
+    "uipath-tool-call-count": {
+        "name": "Tool Call Count Evaluator",
+        "description": "Evaluates the count of specific tool calls.",
+        "category": "trajectory",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "ToolCallCountEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "strict",
+                "label": "Strict",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+                "description": "If true, requires exact count match",
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "toolCallsCount",
+                "label": "Tool Calls Count",
+                "type": "json",
+                "required": True,
+                "description": "Object mapping tool names to [comparison, count] tuples",
+            },
+        ],
+    },
+    "uipath-tool-call-args": {
+        "name": "Tool Call Args Evaluator",
+        "description": "Evaluates the arguments passed to specific tool calls.",
+        "category": "trajectory",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "ToolCallArgsEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "strict",
+                "label": "Strict",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+            },
+            {
+                "name": "subset",
+                "label": "Subset",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+                "description": "If true, only checks that expected args are present (not exact match)",
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "toolCalls",
+                "label": "Tool Calls",
+                "type": "json",
+                "required": True,
+                "description": "Array of {name, args} objects specifying expected tool calls",
+            },
+        ],
+    },
+    "uipath-tool-call-output": {
+        "name": "Tool Call Output Evaluator",
+        "description": "Evaluates the outputs from specific tool calls.",
+        "category": "trajectory",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "ToolCallOutputEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "strict",
+                "label": "Strict",
+                "type": "boolean",
+                "default": False,
+                "required": False,
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "toolOutputs",
+                "label": "Tool Outputs",
+                "type": "json",
+                "required": True,
+                "description": "Array of {name, output} objects specifying expected tool outputs",
+            },
+        ],
+    },
+    "uipath-llm-judge-trajectory-similarity": {
+        "name": "LLM Judge Trajectory Evaluator",
+        "description": "Uses an LLM to evaluate the agent's execution trajectory.",
+        "category": "trajectory",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "LLMJudgeTrajectoryEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "model",
+                "label": "Model",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "prompt",
+                "label": "Prompt",
+                "type": "textarea",
+                "default": "As an expert evaluator, determine how well the agent performed on a scale of 0-100. Focus on whether the agent's actions and outputs matched the expected behavior, while allowing for alternative valid expressions and reasonable variations in language. Maintain high standards for accuracy and completeness. Provide your score with a brief and clear justification explaining your reasoning.\n----\nAgentInput:\n{{UserOrSyntheticInput}}\n----\nExpectedAgentBehavior:\n{{ExpectedAgentBehavior}}\n----\nAgentRunHistory:\n{{AgentRunHistory}}\n",
+                "required": False,
+            },
+            {
+                "name": "temperature",
+                "label": "Temperature",
+                "type": "number",
+                "default": 0.0,
+                "required": False,
+            },
+            {
+                "name": "maxTokens",
+                "label": "Max Tokens",
+                "type": "integer",
+                "default": None,
+                "required": False,
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "expectedAgentBehavior",
+                "label": "Expected Agent Behavior",
+                "type": "textarea",
+                "required": True,
+                "description": "Description of expected agent behavior",
+            },
+        ],
+    },
+    "uipath-llm-judge-trajectory-simulation": {
+        "name": "LLM Judge Trajectory Simulation Evaluator",
+        "description": "Uses an LLM to evaluate agent trajectory with simulation instructions.",
+        "category": "trajectory",
+        "config_fields": [
+            {
+                "name": "name",
+                "label": "Name",
+                "type": "string",
+                "default": "LLMJudgeTrajectorySimulationEvaluator",
+                "required": False,
+            },
+            {
+                "name": "description",
+                "label": "Description",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "model",
+                "label": "Model",
+                "type": "string",
+                "default": "",
+                "required": False,
+            },
+            {
+                "name": "prompt",
+                "label": "Prompt",
+                "type": "textarea",
+                "default": "As an expert evaluator, determine how well the agent did on a scale of 0-100. Focus on if the simulation was successful and if the agent behaved according to the expected output accounting for alternative valid expressions, and reasonable variations in language while maintaining high standards for accuracy and completeness. Provide your score with a justification, explaining briefly and concisely why you gave that score.\n----\nAgentInput:\n{{UserOrSyntheticInput}}\n----\nSimulationInstructions:\n{{SimulationInstructions}}\n----\nExpectedAgentBehavior:\n{{ExpectedAgentBehavior}}\n----\nAgentRunHistory:\n{{AgentRunHistory}}\n",
+                "required": False,
+            },
+            {
+                "name": "temperature",
+                "label": "Temperature",
+                "type": "number",
+                "default": 0.0,
+                "required": False,
+            },
+            {
+                "name": "maxTokens",
+                "label": "Max Tokens",
+                "type": "integer",
+                "default": None,
+                "required": False,
+            },
+        ],
+        "criteria_fields": [
+            {
+                "name": "expectedAgentBehavior",
+                "label": "Expected Agent Behavior",
+                "type": "textarea",
+                "required": True,
+                "description": "Description of expected agent behavior",
+            },
+        ],
+    },
+}
+
+
+def get_evaluator_type(type_id: str) -> dict[str, Any] | None:
+    """Get evaluator type definition by ID."""
+    return EVALUATOR_TYPES.get(type_id)
diff --git a/src/uipath/dev/services/__init__.py b/src/uipath/dev/services/__init__.py
index 8ed5680..6ec4d6d 100644
--- a/src/uipath/dev/services/__init__.py
+++ b/src/uipath/dev/services/__init__.py
@@ -1,7 +1,13 @@
 """UiPath Developer Console services module."""
 
+from uipath.dev.services.eval_run_service import EvalRunService
+from uipath.dev.services.eval_set_service import EvalSetService
+from uipath.dev.services.evaluator_service import EvaluatorService
 from uipath.dev.services.run_service import RunService
 
 __all__ = [
+    "EvalRunService",
+    "EvalSetService",
+    "EvaluatorService",
     "RunService",
 ]
diff --git a/src/uipath/dev/services/eval_run_service.py b/src/uipath/dev/services/eval_run_service.py
new file mode 100644
index 0000000..5659eae
--- /dev/null
+++ b/src/uipath/dev/services/eval_run_service.py
@@ -0,0 +1,366 @@
+"""Service for executing eval runs."""
+
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable
+
+from uipath._cli._evals._console_progress_reporter import ConsoleProgressReporter
+from uipath._cli._evals._evaluate import evaluate
+from uipath._cli._evals._progress_reporter import StudioWebProgressReporter
+from uipath._cli._evals._runtime import UiPathEvalContext
+from uipath._cli._utils._eval_set import EvalHelpers
+from uipath._cli._utils._folders import get_personal_workspace_key_async
+from uipath._cli._utils._studio_project import StudioClient
+from uipath._config import UiPathConfig
+from uipath._events._event_bus import EventBus
+from uipath._utils._bindings import ResourceOverwritesContext
+from uipath.core.tracing import UiPathTraceManager
+from uipath.eval._helpers import auto_discover_entrypoint
+from uipath.runtime import (
+    UiPathRuntimeContext,
+    UiPathRuntimeFactoryProtocol,
+    UiPathRuntimeFactoryRegistry,
+)
+from uipath.runtime.errors import UiPathErrorContract
+from uipath.tracing import LlmOpsHttpExporter
+
+from uipath.dev.infrastructure import RunContextExporter
+from uipath.dev.models import LogMessage, TraceMessage
+from uipath.dev.models.eval_run import EvalRun, EvaluationResult, EvaluatorResult
+
+EvalRunUpdatedCallback = Callable[[EvalRun], None]
+EvalLogCallback = Callable[[LogMessage], None]
+EvalTraceCallback = Callable[[TraceMessage], None]
+
+
+class EvalRunService:
+    """Orchestrates eval runs."""
+
+    def __init__(
+        self,
+        trace_manager: UiPathTraceManager | None = None,
+        on_run_updated: EvalRunUpdatedCallback | None = None,
+        on_log: EvalLogCallback | None = None,
+        on_trace: EvalTraceCallback | None = None,
+    ):
+        """Initialize the eval run service.
+
+        Args:
+            trace_manager: Trace manager for tracing (created if not provided).
+            on_run_updated: Callback when an eval run is updated.
+            on_log: Callback for log messages during evaluation.
+            on_trace: Callback for trace messages during evaluation.
+        """
+        self.trace_manager = trace_manager or UiPathTraceManager()
+        self.runs: dict[str, EvalRun] = {}
+
+        self.on_run_updated = on_run_updated
+        self.on_log = on_log
+        self.on_trace = on_trace
+
+        self.trace_manager.add_span_exporter(
+            RunContextExporter(
+                on_trace=self.handle_trace,
+                on_log=self.handle_log,
+            ),
+            batch=False,
+        )
+
+    def register_run(self, run: EvalRun) -> None:
+        """Register a new run and emit an initial update."""
+        self.runs[run.id] = run
+        self._emit_run_updated(run)
+
+    def get_run(self, run_id: str) -> EvalRun | None:
+        """Get a registered run."""
+        return self.runs.get(run_id)
+
+    def get_runs_for_eval_set(self, eval_set_name: str) -> list[EvalRun]:
+        """Get all runs for an eval set.
+
+        Args:
+            eval_set_name: Name of the eval set.
+
+        Returns:
+            List of EvalRun objects for the given eval set (newest first).
+        """
+        matching_runs = [
+            run
+            for run in self.runs.values()
+            if Path(run.eval_set_path).stem == eval_set_name
+        ]
+        # Sort by start_time descending (newest first)
+        matching_runs.sort(key=lambda r: r.start_time, reverse=True)
+        return matching_runs
+
+    async def execute(self, run: EvalRun) -> None:
+        """Execute an eval run."""
+        try:
+            self._add_info_log(run, "Starting evaluation run...")
+            self._add_info_log(run, f"  Eval set path: {run.eval_set_path}")
+            self._add_info_log(run, f"  Entrypoint: {run.entrypoint}")
+            self._add_info_log(run, f"  Workers: {run.workers}")
+
+            run.status = "running"
+            self._emit_run_updated(run)
+
+            # Setup reporting prerequisites
+            should_register_progress_reporter = await self._setup_reporting_prereq(run)
+
+            event_bus = EventBus()
+
+            # Register progress reporters
+            if should_register_progress_reporter:
+                progress_reporter = StudioWebProgressReporter(LlmOpsHttpExporter())
+                await progress_reporter.subscribe_to_eval_runtime_events(event_bus)
+
+            # Create eval context
+            eval_context = UiPathEvalContext()
+            eval_context.entrypoint = run.entrypoint or auto_discover_entrypoint()
+            eval_context.no_report = run.no_report
+            eval_context.workers = run.workers
+            eval_context.eval_set_run_id = run.eval_set_run_id
+            eval_context.enable_mocker_cache = run.enable_mocker_cache
+
+            # Resolve eval set path
+            eval_set_path = run.eval_set_path
+            _, resolved_eval_set_path = EvalHelpers.load_eval_set(
+                eval_set_path, run.eval_ids
+            )
+            eval_context.eval_set = resolved_eval_set_path
+            eval_context.eval_ids = run.eval_ids
+            eval_context.report_coverage = run.report_coverage
+
+            # Register console reporter
+            console_reporter = ConsoleProgressReporter()
+            await console_reporter.subscribe_to_eval_runtime_events(event_bus)
+
+            self._add_info_log(run, f"Entrypoint: {eval_context.entrypoint}")
+            self._add_info_log(run, f"Eval set: {eval_set_path}")
+
+            # Execute evaluation
+            results = await self._execute_eval(
+                eval_context, event_bus, run.output_file
+            )
+
+            # Parse results and update EvalRun
+            self._parse_eval_results(run, results, run.output_file)
+
+            run.status = "completed"
+            self._add_info_log(
+                run,
+                f"Evaluation completed. Overall score: {run.overall_score * 100:.1f}%",
+            )
+
+        except SystemExit as e:
+            run.status = "failed"
+            run.end_time = datetime.now()
+            error_msg = f"Evaluation process exited with code: {e.code}"
+            run.error = UiPathErrorContract(
+                code="SystemExit",
+                title=error_msg,
+                detail="",
+            )
+            self._add_error_log(run, f"SystemExit caught - {error_msg}")
+            self._emit_run_updated(run)
+            raise RuntimeError(error_msg) from e
+
+        except BaseException as e:
+            run.status = "failed"
+            run.end_time = datetime.now()
+            import traceback
+
+            exc_type = type(e).__name__
+            exc_str = str(e) if str(e) else "(no message)"
+            error_msg = f"{exc_type}: {exc_str}"
+            run.error = UiPathErrorContract(
+                code=exc_type,
+                title=exc_str,
+                detail=traceback.format_exc(),
+            )
+            self._add_error_log(run, f"Exception caught - type: {exc_type}, message: {exc_str}")
+            self._add_error_log(run, traceback.format_exc())
+            self._emit_run_updated(run)
+            if isinstance(e, Exception):
+                raise
+            else:
+                raise RuntimeError(error_msg) from e
+
+        self._emit_run_updated(run)
+
+    def _emit_run_updated(self, run: EvalRun) -> None:
+        """Notify observers that a run's state changed."""
+        self.runs[run.id] = run
+        if self.on_run_updated is not None:
+            self.on_run_updated(run)
+
+    def handle_log(self, log_msg: LogMessage) -> None:
+        """Entry point for all logs."""
+        run = self.runs.get(log_msg.run_id)
+        if run is not None:
+            run.logs.append(log_msg)
+            self._emit_run_updated(run)
+
+        if self.on_log is not None:
+            self.on_log(log_msg)
+
+    def handle_trace(self, trace_msg: TraceMessage) -> None:
+        """Entry point for traces (from RunContextExporter)."""
+        run = self.runs.get(trace_msg.run_id)
+        if run is not None:
+            # Update or append trace (upsert by span_id)
+            for i, existing_trace in enumerate(run.traces):
+                if existing_trace.span_id == trace_msg.span_id:
+                    run.traces[i] = trace_msg
+                    break
+            else:
+                run.traces.append(trace_msg)
+
+            self._emit_run_updated(run)
+
+        if self.on_trace is not None:
+            self.on_trace(trace_msg)
+
+    def _add_info_log(self, run: EvalRun, message: str) -> None:
+        log_msg = LogMessage(
+            run_id=run.id,
+            level="INFO",
+            message=message,
+            timestamp=datetime.now(),
+        )
+        self.handle_log(log_msg)
+
+    def _add_error_log(self, run: EvalRun, message: str) -> None:
+        log_msg = LogMessage(
+            run_id=run.id,
+            level="ERROR",
+            message=message,
+            timestamp=datetime.now(),
+        )
+        self.handle_log(log_msg)
+
+    async def _setup_reporting_prereq(self, run: EvalRun) -> bool:
+        """Setup reporting prerequisites."""
+        if run.no_report:
+            return False
+
+        if not UiPathConfig.is_studio_project:
+            self._add_info_log(
+                run,
+                "UIPATH_PROJECT_ID not set. Results will not be reported to Studio Web.",
+            )
+            return False
+
+        if not UiPathConfig.folder_key:
+            folder_key = await get_personal_workspace_key_async()
+            if folder_key:
+                os.environ["UIPATH_FOLDER_KEY"] = folder_key
+        return True
+
+    async def _execute_eval(
+        self,
+        eval_context: UiPathEvalContext,
+        event_bus: EventBus,
+        output_file: str | None = None,
+    ) -> Any:
+        """Execute the evaluation.
+
+        Creates a new runtime factory inside the context and disposes it at the end.
+        """
+        with UiPathRuntimeContext.with_defaults(
+            output_file=output_file,
+            trace_manager=self.trace_manager,
+            command="eval",
+        ) as ctx:
+            if ctx.job_id:
+                self.trace_manager.add_span_exporter(LlmOpsHttpExporter())
+
+            project_id = UiPathConfig.project_id
+
+            # Create runtime factory inside context
+            runtime_factory = UiPathRuntimeFactoryRegistry.get(context=ctx)
+
+            try:
+                if project_id:
+                    studio_client = StudioClient(project_id)
+
+                    async with ResourceOverwritesContext(
+                        lambda: studio_client.get_resource_overwrites()
+                    ):
+                        ctx.result = await evaluate(
+                            runtime_factory,
+                            self.trace_manager,
+                            eval_context,
+                            event_bus,
+                        )
+                else:
+                    ctx.result = await evaluate(
+                        runtime_factory, self.trace_manager, eval_context, event_bus
+                    )
+            finally:
+                # Dispose runtime factory
+                if runtime_factory:
+                    await runtime_factory.dispose()
+
+            return ctx.result
+
+    def _parse_eval_results(
+        self,
+        run: EvalRun,
+        results: Any,
+        output_file: str | None = None,
+    ) -> None:
+        """Parse evaluation results and populate the EvalRun."""
+        data = None
+
+        # Try to get data from results
+        if results and hasattr(results, "output") and results.output:
+            try:
+                if isinstance(results.output, str):
+                    data = json.loads(results.output)
+                elif isinstance(results.output, dict):
+                    data = results.output
+            except Exception:
+                pass
+
+        # Try to read from output file if no data yet
+        if not data and output_file:
+            try:
+                output_path = Path(output_file)
+                if output_path.exists():
+                    with open(output_path, "r") as f:
+                        data = json.load(f)
+            except Exception:
+                pass
+
+        if not data:
+            return
+
+        # Parse the evaluation set results
+        eval_set_results = data.get("evaluationSetResults", [])
+
+        for eval_result_data in eval_set_results:
+            eval_name = eval_result_data.get("evaluationName", "Unknown")
+            eval_id = eval_result_data.get("evaluationId", eval_name)
+
+            eval_result = EvaluationResult(eval_id=eval_id, eval_name=eval_name)
+
+            eval_run_results = eval_result_data.get("evaluationRunResults", [])
+            for run_result in eval_run_results:
+                evaluator_name = run_result.get("evaluatorName", "Unknown")
+                result_data = run_result.get("result", {})
+
+                eval_result.evaluator_results.append(
+                    EvaluatorResult(
+                        evaluator_id=evaluator_name,
+                        evaluator_name=evaluator_name,
+                        score=result_data.get("score", 0.0),
+                        details=str(result_data.get("details", "")),
+                        evaluation_time=result_data.get("evaluationTime", 0.0),
+                        justification=result_data.get("justification", ""),
+                    )
+                )
+
+            run.evaluation_results.append(eval_result)
diff --git a/src/uipath/dev/services/eval_set_service.py b/src/uipath/dev/services/eval_set_service.py
new file mode 100644
index 0000000..632824f
--- /dev/null
+++ b/src/uipath/dev/services/eval_set_service.py
@@ -0,0 +1,148 @@
+"""Service for managing eval set definitions."""
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+
+class EvalSetService:
+    """Service for eval set CRUD operations."""
+
+    def __init__(
+        self,
+        base_dir: str | Path | None = None,
+        eval_sets_subdir: str = "evaluations/eval-sets",
+    ):
+        """Initialize the eval set service.
+
+        Args:
+            base_dir: Base directory for evaluations. Defaults to current working directory.
+            eval_sets_subdir: Subdirectory for eval set JSON files.
+        """
+        self.base_dir = Path(base_dir) if base_dir else Path(os.getcwd())
+        self.eval_sets_dir = self.base_dir / eval_sets_subdir
+
+    def ensure_directory(self) -> None:
+        """Ensure the eval-sets directory exists."""
+        self.eval_sets_dir.mkdir(parents=True, exist_ok=True)
+
+    def list_eval_sets(self) -> list[dict[str, Any]]:
+        """List all eval sets from evaluations/eval-sets/.
+
+        Returns:
+            List of eval set data dictionaries.
+        """
+        eval_sets = []
+        if not self.eval_sets_dir.exists():
+            return eval_sets
+
+        for json_file in self.eval_sets_dir.glob("*.json"):
+            try:
+                with open(json_file, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                eval_sets.append({
+                    "id": data.get("id", json_file.stem),
+                    "name": data.get("name", json_file.stem),
+                    "evaluatorRefs": data.get("evaluatorRefs", []),
+                    "evaluation_count": len(data.get("evaluations", [])),
+                    "file_path": str(json_file),
+                    "data": data,
+                })
+            except (json.JSONDecodeError, IOError):
+                continue
+
+        return eval_sets
+
+    def load_eval_set(self, eval_set_id: str) -> dict[str, Any] | None:
+        """Load an eval set by ID.
+
+        Args:
+            eval_set_id: The eval set ID to load.
+
+        Returns:
+            The eval set data dictionary, or None if not found.
+        """
+        if not self.eval_sets_dir.exists():
+            return None
+
+        # Try direct file match first
+        json_file = self.eval_sets_dir / f"{eval_set_id}.json"
+        if json_file.exists():
+            try:
+                with open(json_file, "r", encoding="utf-8") as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, IOError):
+                return None
+
+        # Search by ID field
+        for json_file in self.eval_sets_dir.glob("*.json"):
+            try:
+                with open(json_file, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                if data.get("id") == eval_set_id:
+                    return data
+            except (json.JSONDecodeError, IOError):
+                continue
+
+        return None
+
+    def create_eval_set(
+        self,
+        eval_set_id: str,
+        name: str,
+        evaluator_refs: list[str],
+        evaluations: list[dict[str, Any]],
+    ) -> str:
+        """Create a new eval set JSON file.
+
+        Args:
+            eval_set_id: Unique ID for the eval set.
+            name: Display name for the eval set.
+            evaluator_refs: List of evaluator IDs to include.
+            evaluations: List of evaluation/test case definitions.
+
+        Returns:
+            The path to the created file.
+
+        Raises:
+            ValueError: If ID already exists.
+        """
+        self.ensure_directory()
+
+        # Check if ID already exists
+        existing_file = self.eval_sets_dir / f"{eval_set_id}.json"
+        if existing_file.exists():
+            raise ValueError(f"Eval set with ID '{eval_set_id}' already exists")
+
+        # Build the eval set JSON structure
+        eval_set_data = {
+            "version": "1.0",
+            "id": eval_set_id,
+            "name": name,
+            "evaluatorRefs": evaluator_refs,
+            "evaluations": evaluations,
+        }
+
+        # Write to file
+        file_path = self.eval_sets_dir / f"{eval_set_id}.json"
+        with open(file_path, "w", encoding="utf-8") as f:
+            json.dump(eval_set_data, f, indent=2)
+
+        return str(file_path)
+
+    def save_eval_set(self, eval_set_id: str, data: dict[str, Any]) -> str:
+        """Save an eval set directly with the given data.
+
+        Args:
+            eval_set_id: The eval set ID.
+            data: The full eval set data to save.
+
+        Returns:
+            The path to the saved file.
+        """
+        self.ensure_directory()
+        file_path = self.eval_sets_dir / f"{eval_set_id}.json"
+        with open(file_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2)
+        return str(file_path)
diff --git a/src/uipath/dev/services/evaluator_service.py b/src/uipath/dev/services/evaluator_service.py
new file mode 100644
index 0000000..a259ac4
--- /dev/null
+++ b/src/uipath/dev/services/evaluator_service.py
@@ -0,0 +1,212 @@
+"""Service for managing evaluator instances."""
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from uipath.dev.models.evaluator_types import get_evaluator_type
+
+
+class EvaluatorService:
+    """Service for evaluator CRUD operations."""
+
+    def __init__(
+        self,
+        base_dir: str | Path | None = None,
+        evaluators_subdir: str = "evaluations/evaluators",
+    ):
+        """Initialize the evaluator service.
+
+        Args:
+            base_dir: Base directory for evaluations. Defaults to current working directory.
+            evaluators_subdir: Subdirectory for evaluator JSON files.
+        """
+        self.base_dir = Path(base_dir) if base_dir else Path(os.getcwd())
+        self.evaluators_dir = self.base_dir / evaluators_subdir
+
+    def ensure_directory(self) -> None:
+        """Ensure the evaluators directory exists."""
+        self.evaluators_dir.mkdir(parents=True, exist_ok=True)
+
+    def list_evaluators(self) -> list[dict[str, Any]]:
+        """List all evaluator instances from evaluations/evaluators/.
+
+        Returns:
+            List of evaluator data dictionaries with id, description, and type info.
+        """
+        evaluators = []
+        if not self.evaluators_dir.exists():
+            return evaluators
+
+        for json_file in self.evaluators_dir.glob("*.json"):
+            try:
+                with open(json_file, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                evaluators.append({
+                    "id": data.get("id", json_file.stem),
+                    "description": data.get("description", ""),
+                    "evaluatorTypeId": data.get("evaluatorTypeId", ""),
+                    "file_path": str(json_file),
+                    "data": data,
+                })
+            except (json.JSONDecodeError, IOError):
+                continue
+
+        return evaluators
+
+    def load_evaluator(self, evaluator_id: str) -> dict[str, Any] | None:
+        """Load an evaluator by ID.
+
+        Args:
+            evaluator_id: The evaluator ID to load.
+
+        Returns:
+            The evaluator data dictionary, or None if not found.
+        """
+        if not self.evaluators_dir.exists():
+            return None
+
+        # Try direct file match first
+        json_file = self.evaluators_dir / f"{evaluator_id}.json"
+        if json_file.exists():
+            try:
+                with open(json_file, "r", encoding="utf-8") as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, IOError):
+                return None
+
+        # Search by ID field in all files
+        for json_file in self.evaluators_dir.glob("*.json"):
+            try:
+                with open(json_file, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                if data.get("id") == evaluator_id:
+                    return data
+            except (json.JSONDecodeError, IOError):
+                continue
+
+        return None
+
+    def create_evaluator(
+        self,
+        evaluator_id: str,
+        evaluator_type_id: str,
+        description: str,
+        config: dict[str, Any],
+        default_criteria: dict[str, Any] | None = None,
+    ) -> str:
+        """Create a new evaluator JSON file.
+
+        Args:
+            evaluator_id: Unique ID for the evaluator.
+            evaluator_type_id: The evaluator type ID (e.g., 'uipath-exact-match').
+            description: Description of the evaluator.
+            config: Configuration dictionary for the evaluator.
+            default_criteria: Optional default evaluation criteria.
+
+        Returns:
+            The path to the created file.
+
+        Raises:
+            ValueError: If the evaluator type is invalid or ID already exists.
+        """
+        self.ensure_directory()
+
+        # Validate evaluator type
+        type_def = get_evaluator_type(evaluator_type_id)
+        if not type_def:
+            raise ValueError(f"Invalid evaluator type: {evaluator_type_id}")
+
+        # Check if ID already exists
+        existing_file = self.evaluators_dir / f"{evaluator_id}.json"
+        if existing_file.exists():
+            raise ValueError(f"Evaluator with ID '{evaluator_id}' already exists")
+
+        # Build evaluator config
+        evaluator_config = config.copy() if config else {}
+        if default_criteria:
+            evaluator_config["defaultEvaluationCriteria"] = default_criteria
+
+        # Build the evaluator JSON structure
+        evaluator_data = {
+            "version": "1.0",
+            "id": evaluator_id,
+            "description": description,
+            "evaluatorTypeId": evaluator_type_id,
+            "evaluatorConfig": evaluator_config,
+        }
+
+        # Write to file
+        file_path = self.evaluators_dir / f"{evaluator_id}.json"
+        with open(file_path, "w", encoding="utf-8") as f:
+            json.dump(evaluator_data, f, indent=2)
+
+        return str(file_path)
+
+    def save_evaluator(self, evaluator_id: str, data: dict[str, Any]) -> str:
+        """Save an evaluator directly with the given data.
+
+        Args:
+            evaluator_id: The evaluator ID.
+            data: The full evaluator data to save.
+
+        Returns:
+            The path to the saved file.
+        """
+        self.ensure_directory()
+
+        # Build the evaluator JSON structure
+        evaluator_data = {
+            "version": "1.0",
+            "id": data.get("id", evaluator_id),
+            "description": data.get("description", ""),
+            "evaluatorTypeId": data.get("evaluatorTypeId", ""),
+            "evaluatorConfig": data.get("evaluatorConfig", data.get("config", {})),
+        }
+        if data.get("defaultCriteria"):
+            evaluator_data["evaluatorConfig"]["defaultEvaluationCriteria"] = data[
+                "defaultCriteria"
+            ]
+
+        # Find existing file for this evaluator
+        file_path = None
+        if self.evaluators_dir.exists():
+            # First check direct filename match
+            direct_path = self.evaluators_dir / f"{evaluator_id}.json"
+            if direct_path.exists():
+                file_path = direct_path
+            else:
+                # Search by ID field in all files
+                for json_file in self.evaluators_dir.glob("*.json"):
+                    try:
+                        with open(json_file, "r", encoding="utf-8") as f:
+                            existing_data = json.load(f)
+                        if existing_data.get("id") == evaluator_id:
+                            file_path = json_file
+                            break
+                    except (json.JSONDecodeError, IOError):
+                        continue
+
+        # Default to ID-based filename if no existing file found
+        if file_path is None:
+            file_path = self.evaluators_dir / f"{evaluator_id}.json"
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            json.dump(evaluator_data, f, indent=2)
+        return str(file_path)
+
+    def delete_evaluator(self, evaluator_id: str) -> bool:
+        """Delete an evaluator by ID.
+
+        Args:
+            evaluator_id: The evaluator ID to delete.
+
+        Returns:
+            True if deleted, False if not found.
+        """
+        file_path = self.evaluators_dir / f"{evaluator_id}.json"
+        if file_path.exists():
+            file_path.unlink()
+            return True
+        return False
diff --git a/src/uipath/dev/ui/panels/__init__.py b/src/uipath/dev/ui/panels/__init__.py
index 43c16b3..4d86f0b 100644
--- a/src/uipath/dev/ui/panels/__init__.py
+++ b/src/uipath/dev/ui/panels/__init__.py
@@ -1,11 +1,22 @@
 """UiPath Dev Console panels module initialization."""
 
-from uipath.dev.ui.panels.new_run_panel import NewRunPanel
-from uipath.dev.ui.panels.run_details_panel import RunDetailsPanel
-from uipath.dev.ui.panels.run_history_panel import RunHistoryPanel
+from uipath.dev.ui.panels.evals import EvalRunDetailsPanel
+from uipath.dev.ui.panels.evaluators import EvaluatorFormPanel
+from uipath.dev.ui.panels.runs import NewRunPanel, RunDetailsPanel
+from uipath.dev.ui.panels.sidebar import (
+    EvalSetsTab,
+    EvaluatorsTab,
+    RunHistoryTab,
+    SidebarPanel,
+)
 
 __all__ = [
+    "EvalRunDetailsPanel",
+    "EvalSetsTab",
+    "EvaluatorFormPanel",
+    "EvaluatorsTab",
     "NewRunPanel",
     "RunDetailsPanel",
-    "RunHistoryPanel",
+    "RunHistoryTab",
+    "SidebarPanel",
 ]
diff --git a/src/uipath/dev/ui/panels/evals/__init__.py b/src/uipath/dev/ui/panels/evals/__init__.py
new file mode 100644
index 0000000..91c00ce
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/__init__.py
@@ -0,0 +1,17 @@
+"""Evals panel components."""
+
+from uipath.dev.ui.panels.evals.assign_evaluator_panel import AssignEvaluatorPanel
+from uipath.dev.ui.panels.evals.eval_set_create_panel import EvalSetCreatePanel
+from uipath.dev.ui.panels.evals.evaluation_edit_panel import EvaluationEditPanel
+from uipath.dev.ui.panels.evals.evaluations_list_panel import EvaluationsListPanel
+from uipath.dev.ui.panels.evals.run_details_panel import EvalRunDetailsPanel
+from uipath.dev.ui.panels.evals.runs_list_panel import EvalRunsListPanel
+
+__all__ = [
+    "AssignEvaluatorPanel",
+    "EvalRunDetailsPanel",
+    "EvalRunsListPanel",
+    "EvalSetCreatePanel",
+    "EvaluationEditPanel",
+    "EvaluationsListPanel",
+]
diff --git a/src/uipath/dev/ui/panels/evals/assign_evaluator_panel.py b/src/uipath/dev/ui/panels/evals/assign_evaluator_panel.py
new file mode 100644
index 0000000..838f8ff
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/assign_evaluator_panel.py
@@ -0,0 +1,204 @@
+"""Assign evaluator panel for assigning evaluators to an eval set."""
+
+import json
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal, ScrollableContainer, Vertical
+from textual.widgets import Button, Checkbox, Collapsible, Static
+
+from uipath.dev.services.evaluator_service import EvaluatorService
+
+
+class AssignEvaluatorPanel(Vertical):
+    """Panel for assigning evaluators to an eval set."""
+
+    def __init__(
+        self,
+        evaluator_service: EvaluatorService | None = None,
+        on_assign: Callable[[list[str]], None] | None = None,
+        on_close: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the assign evaluator panel.
+
+        Args:
+            evaluator_service: Service for loading evaluator definitions.
+            on_assign: Callback with selected evaluator IDs when assign is clicked.
+            on_close: Callback when close button is clicked.
+        """
+        super().__init__(**kwargs)
+        self.evaluator_service = evaluator_service or EvaluatorService()
+        self.on_assign = on_assign
+        self.on_close = on_close
+
+        self._unassigned_evaluators: list[dict[str, Any]] = []
+        self._selected_evaluators: set[str] = set()
+        self._eval_set_data: dict[str, Any] | None = None
+        self._eval_set_path: str = ""
+
+    def compose(self) -> ComposeResult:
+        """Compose the panel UI."""
+        yield ScrollableContainer(id="assign-evaluator-content")
+
+    def set_data(
+        self,
+        unassigned: list[dict[str, Any]],
+        eval_set_data: dict[str, Any] | None,
+        eval_set_path: str = "",
+    ) -> None:
+        """Set the data and populate the form.
+
+        Args:
+            unassigned: List of evaluator dicts that are not yet assigned.
+            eval_set_data: The parent eval set data.
+            eval_set_path: Path to the eval set file (for saving).
+        """
+        self._unassigned_evaluators = unassigned
+        self._eval_set_data = eval_set_data
+        self._eval_set_path = eval_set_path
+        self._selected_evaluators = set()
+        self.call_later(self._populate_form)
+
+    def get_updated_eval_set_data(self) -> dict[str, Any] | None:
+        """Get the updated eval set data after assign."""
+        return self._eval_set_data
+
+    async def _populate_form(self) -> None:
+        """Populate the form with unassigned evaluators."""
+        try:
+            content = self.query_one("#assign-evaluator-content", ScrollableContainer)
+            await content.remove_children()
+
+            if not self._unassigned_evaluators:
+                await content.mount(
+                    Static(
+                        "[dim]All evaluators are already assigned.[/dim]",
+                        classes="helper-text",
+                    )
+                )
+                return
+
+            # Close button
+            await content.mount(
+                Button("✕", id="close-assign-evaluator-btn", classes="close-btn")
+            )
+
+            # Info text
+            await content.mount(
+                Static("[bold]Assign Evaluators[/bold]", classes="detail-row")
+            )
+            await content.mount(
+                Static(
+                    "[dim]Select evaluators to assign to this eval set. Expand to see details.[/dim]",
+                    classes="helper-text",
+                )
+            )
+
+            # Add checkbox + collapsible for each unassigned evaluator
+            for ev in self._unassigned_evaluators:
+                ev_id = ev.get("id", "")
+                if ev_id:
+                    await self._add_evaluator_info_section(content, ev_id)
+
+            # Assign button
+            await content.mount(
+                Button(
+                    "Assign Selected",
+                    id="do-assign-evaluator-btn",
+                    variant="primary",
+                    classes="small-btn",
+                )
+            )
+
+        except Exception:
+            pass
+
+    async def _add_evaluator_info_section(
+        self, content: ScrollableContainer, ev_ref: str
+    ) -> None:
+        """Add a collapsible section showing evaluator info."""
+        # Load evaluator info
+        ev_data = self.evaluator_service.load_evaluator(ev_ref)
+        ev_desc = ev_data.get("description", "") if ev_data else ""
+
+        # Get default criteria from evaluator definition
+        default_criteria = {}
+        if ev_data:
+            ev_config = ev_data.get("evaluatorConfig", {})
+            default_criteria = ev_config.get("defaultEvaluationCriteria", {})
+
+        # Build info widgets list - description and default criteria (read-only)
+        info_children: list = []
+        if ev_desc:
+            info_children.append(Static(f"[dim]{ev_desc}[/dim]", classes="ev-desc"))
+
+        if default_criteria:
+            info_children.append(
+                Static("[bold]DEFAULT CRITERIA[/bold]", classes="ev-criteria-header")
+            )
+            for key, value in default_criteria.items():
+                value_str = (
+                    json.dumps(value)
+                    if isinstance(value, (dict, list))
+                    else str(value or "")
+                )
+                info_children.append(
+                    Static(f"[dim]{key}:[/dim] {value_str}", classes="ev-field-lbl")
+                )
+        else:
+            info_children.append(
+                Static("[dim]No default criteria[/dim]", classes="helper-text")
+            )
+
+        # Row with checkbox + collapsible
+        ev_row = Horizontal(classes="ev-row")
+        await content.mount(ev_row)
+
+        is_selected = ev_ref in self._selected_evaluators
+        await ev_row.mount(
+            Checkbox(
+                "", value=is_selected, id=f"assign-ev-{ev_ref}", classes="ev-cb"
+            )
+        )
+
+        # Collapsible with evaluator name as title
+        collapsible = Collapsible(
+            *info_children,
+            title=ev_ref,
+            collapsed=True,
+            id=f"assign-ev-collapse-{ev_ref}",
+            classes="ev-collapse",
+        )
+        await ev_row.mount(collapsible)
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        btn_id = event.button.id or ""
+
+        if btn_id == "close-assign-evaluator-btn":
+            event.stop()
+            if self.on_close:
+                self.on_close()
+        elif btn_id == "do-assign-evaluator-btn":
+            event.stop()
+            await self._handle_assign()
+
+    async def on_checkbox_changed(self, event: Checkbox.Changed) -> None:
+        """Track selected evaluators."""
+        cb_id = event.checkbox.id or ""
+        if cb_id.startswith("assign-ev-"):
+            ev_ref = cb_id.replace("assign-ev-", "")
+            if event.value:
+                self._selected_evaluators.add(ev_ref)
+            else:
+                self._selected_evaluators.discard(ev_ref)
+
+    async def _handle_assign(self) -> None:
+        """Emit selected evaluator IDs to parent."""
+        if not self._selected_evaluators:
+            self.app.notify("Please select at least one evaluator", severity="error")
+            return
+
+        if self.on_assign:
+            self.on_assign(list(self._selected_evaluators))
diff --git a/src/uipath/dev/ui/panels/evals/eval_set_create_panel.py b/src/uipath/dev/ui/panels/evals/eval_set_create_panel.py
new file mode 100644
index 0000000..fa3522f
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/eval_set_create_panel.py
@@ -0,0 +1,122 @@
+"""Eval set creation panel for creating new eval sets."""
+
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import ScrollableContainer, Vertical
+from textual.widgets import Button, Input, Static
+
+
+class EvalSetCreatePanel(Vertical):
+    """Panel for creating a new eval set."""
+
+    def __init__(
+        self,
+        on_create: Callable[[dict[str, Any]], None] | None = None,
+        on_close: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the eval set create panel.
+
+        Args:
+            on_create: Callback with form data when create is clicked.
+            on_close: Callback when close button is clicked.
+        """
+        super().__init__(**kwargs)
+        self.on_create = on_create
+        self.on_close = on_close
+
+    def compose(self) -> ComposeResult:
+        """Compose the panel UI."""
+        yield ScrollableContainer(id="eval-set-create-content")
+
+    def on_mount(self) -> None:
+        """Handle mount event - populate the form."""
+        self.call_later(self._populate_form)
+
+    async def _populate_form(self) -> None:
+        """Populate the form."""
+        try:
+            content = self.query_one("#eval-set-create-content", ScrollableContainer)
+            await content.remove_children()
+
+            # Close button
+            await content.mount(
+                Button("✕", id="close-eval-set-create-btn", classes="close-btn")
+            )
+
+            # Form fields
+            await content.mount(
+                Static("[bold]Eval Set ID *[/bold]", classes="detail-row")
+            )
+            await content.mount(
+                Input(placeholder="my-eval-set", id="new-eval-set-id-input")
+            )
+
+            await content.mount(Static("[bold]Name[/bold]", classes="detail-row"))
+            await content.mount(
+                Input(placeholder="My Evaluation Set", id="new-eval-set-name-input")
+            )
+
+            await content.mount(
+                Static(
+                    "[dim]After creation, assign evaluators and add evaluations from the tabs.[/dim]",
+                    classes="helper-text",
+                )
+            )
+
+            # Create button
+            await content.mount(
+                Button(
+                    "Create Eval Set",
+                    id="create-eval-set-btn",
+                    variant="primary",
+                    classes="small-btn",
+                )
+            )
+
+        except Exception:
+            pass
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        btn_id = event.button.id or ""
+
+        if btn_id == "close-eval-set-create-btn":
+            event.stop()
+            if self.on_close:
+                self.on_close()
+        elif btn_id == "create-eval-set-btn":
+            event.stop()
+            await self._handle_create()
+
+    async def _handle_create(self) -> None:
+        """Collect form data and emit to parent."""
+        try:
+            id_input = self.query_one("#new-eval-set-id-input", Input)
+            name_input = self.query_one("#new-eval-set-name-input", Input)
+
+            eval_set_id = id_input.value.strip()
+            if not eval_set_id:
+                self.app.notify("Please enter an eval set ID", severity="error")
+                return
+
+            name = name_input.value.strip() or eval_set_id
+
+            # Build form data to emit
+            form_data: dict[str, Any] = {
+                "eval_set_id": eval_set_id,
+                "name": name,
+                "evaluator_refs": [],
+                "evaluations": [],
+            }
+
+            if self.on_create:
+                self.on_create(form_data)
+
+        except Exception as e:
+            self.app.notify(f"Error collecting form data: {e}", severity="error")
+
+    def reset(self) -> None:
+        """Reset the form to initial state."""
+        self.call_later(self._populate_form)
diff --git a/src/uipath/dev/ui/panels/evals/evaluation_edit_panel.py b/src/uipath/dev/ui/panels/evals/evaluation_edit_panel.py
new file mode 100644
index 0000000..2e0bd5c
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/evaluation_edit_panel.py
@@ -0,0 +1,358 @@
+"""Evaluation edit panel for adding/editing evaluations."""
+
+import json
+import uuid
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal, ScrollableContainer, Vertical
+from textual.widgets import Button, Checkbox, Collapsible, Input, Static, TextArea
+
+from uipath.dev.services.evaluator_service import EvaluatorService
+
+
+class EvaluationEditPanel(Vertical):
+    """Panel for adding or editing an evaluation."""
+
+    def __init__(
+        self,
+        evaluator_service: EvaluatorService | None = None,
+        on_save: Callable[[dict[str, Any]], None] | None = None,
+        on_delete: Callable[[str], None] | None = None,
+        on_close: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the evaluation edit panel.
+
+        Args:
+            evaluator_service: Service for loading evaluator definitions.
+            on_save: Callback with form data when save is clicked.
+            on_delete: Callback with eval_id when delete is clicked.
+            on_close: Callback when close button is clicked.
+        """
+        super().__init__(**kwargs)
+        self.evaluator_service = evaluator_service or EvaluatorService()
+        self.on_save = on_save
+        self.on_delete = on_delete
+        self.on_close = on_close
+
+        self._evaluation_data: dict[str, Any] | None = None
+        self._eval_set_data: dict[str, Any] | None = None
+        self._eval_set_path: str = ""
+        self._is_add_mode: bool = True
+        self._enabled_evaluators: set[str] = set()
+        self._evaluator_criterias: dict[str, dict[str, Any]] = {}
+
+    def compose(self) -> ComposeResult:
+        """Compose the panel UI."""
+        yield ScrollableContainer(id="evaluation-edit-content")
+
+    def set_data(
+        self,
+        evaluation: dict[str, Any] | None,
+        eval_set_data: dict[str, Any] | None,
+        eval_set_path: str = "",
+    ) -> None:
+        """Set the evaluation data and populate the form.
+
+        Args:
+            evaluation: The evaluation data to edit, or None for add mode.
+            eval_set_data: The parent eval set data containing evaluatorRefs.
+            eval_set_path: Path to the eval set file (for saving).
+        """
+        self._evaluation_data = evaluation
+        self._eval_set_data = eval_set_data
+        self._eval_set_path = eval_set_path
+        self._is_add_mode = evaluation is None
+
+        # Initialize enabled evaluators and criteria from evaluation data
+        self._enabled_evaluators = set()
+        self._evaluator_criterias = {}
+
+        if evaluation:
+            # Load existing criteria from evaluation
+            # null/None means disabled, non-null (even empty {}) means enabled
+            eval_criteria = evaluation.get("evaluationCriterias", {})
+            for ev_ref, criteria in eval_criteria.items():
+                if criteria is not None:
+                    self._enabled_evaluators.add(ev_ref)
+                    self._evaluator_criterias[ev_ref] = criteria
+
+        self.call_later(self._populate_form)
+
+    def get_updated_eval_set_data(self) -> dict[str, Any] | None:
+        """Get the updated eval set data after save/delete."""
+        return self._eval_set_data
+
+    async def _populate_form(self) -> None:
+        """Populate the form with current data."""
+        try:
+            content = self.query_one("#evaluation-edit-content", ScrollableContainer)
+            await content.remove_children()
+
+            # Close button
+            await content.mount(
+                Button("✕", id="close-evaluation-edit-btn", classes="close-btn")
+            )
+
+            # Name field
+            await content.mount(Static("[bold]Name *[/bold]", classes="detail-row"))
+            name_value = (
+                self._evaluation_data.get("name", "")
+                if self._evaluation_data
+                else ""
+            )
+            await content.mount(
+                Input(
+                    value=name_value,
+                    placeholder="Test case name" if self._is_add_mode else "",
+                    id="evaluation-name-input",
+                    classes="detail-input",
+                )
+            )
+
+            # Inputs (JSON)
+            await content.mount(Static("[bold]Input[/bold]", classes="detail-row"))
+            await content.mount(
+                Static(
+                    "[dim]Provide the input data for this evaluation as a JSON object.[/dim]",
+                    classes="helper-text",
+                )
+            )
+
+            if self._evaluation_data:
+                inputs_json = json.dumps(
+                    self._evaluation_data.get("inputs", {}), indent=2
+                )
+            else:
+                inputs_json = '{\n  "query": "your input here"\n}'
+
+            await content.mount(
+                TextArea(inputs_json, id="evaluation-inputs-textarea", classes="detail-json")
+            )
+
+            # Evaluator Criteria section
+            await content.mount(
+                Static("[bold]Evaluator Criteria[/bold]", classes="detail-row")
+            )
+            await content.mount(
+                Static(
+                    "[dim]Enable evaluators and configure their criteria. Expand each evaluator to customize values.[/dim]",
+                    classes="helper-text",
+                )
+            )
+
+            # Add collapsible sections for each evaluator
+            if self._eval_set_data:
+                evaluator_refs = self._eval_set_data.get("evaluatorRefs", [])
+                for ev_ref in evaluator_refs:
+                    await self._add_evaluator_criteria_section(content, ev_ref)
+
+            # Action buttons
+            buttons = [
+                Button(
+                    "Save",
+                    id="save-evaluation-btn",
+                    variant="primary",
+                    classes="small-btn",
+                )
+            ]
+
+            if not self._is_add_mode:
+                buttons.append(
+                    Button(
+                        "Remove",
+                        id="delete-evaluation-btn",
+                        variant="error",
+                        classes="small-btn",
+                    )
+                )
+
+            await content.mount(Horizontal(*buttons, classes="list-actions-row"))
+
+        except Exception:
+            pass
+
+    async def _add_evaluator_criteria_section(
+        self, content: ScrollableContainer, ev_ref: str
+    ) -> None:
+        """Add a collapsible section for an evaluator's criteria."""
+        # Load evaluator info
+        ev_data = self.evaluator_service.load_evaluator(ev_ref)
+        ev_desc = ev_data.get("description", "") if ev_data else ""
+
+        # Get current criteria for this evaluator
+        current_criteria = self._evaluator_criterias.get(ev_ref, {})
+        is_enabled = ev_ref in self._enabled_evaluators
+
+        # Get default criteria from evaluator definition
+        default_criteria: dict[str, Any] = {}
+        if ev_data:
+            ev_config = ev_data.get("evaluatorConfig", {}) or {}
+            default_criteria = ev_config.get("defaultEvaluationCriteria", {}) or {}
+
+        # Merge with current criteria
+        merged_criteria = {**default_criteria, **current_criteria}
+
+        # Build criteria widgets list
+        criteria_children: list = []
+        if ev_desc:
+            criteria_children.append(Static(f"[dim]{ev_desc}[/dim]", classes="ev-desc"))
+        criteria_children.append(
+            Static("[bold]EVALUATION CRITERIA[/bold]", classes="ev-criteria-header")
+        )
+
+        if merged_criteria:
+            for key, value in merged_criteria.items():
+                value_str = (
+                    json.dumps(value)
+                    if isinstance(value, (dict, list))
+                    else str(value or "")
+                )
+                criteria_children.append(Static(f"{key}:", classes="ev-field-lbl"))
+                criteria_children.append(
+                    Input(
+                        value=value_str,
+                        id=f"ev-criteria-{ev_ref}-{key}",
+                        classes="ev-field-input",
+                    )
+                )
+        else:
+            criteria_children.append(
+                Static("[dim]No criteria fields[/dim]", classes="helper-text")
+            )
+
+        # Row with checkbox + collapsible
+        ev_row = Horizontal(classes="ev-row")
+        await content.mount(ev_row)
+
+        await ev_row.mount(
+            Checkbox("", value=is_enabled, id=f"ev-enable-{ev_ref}", classes="ev-cb")
+        )
+
+        # Collapsible with evaluator name as title
+        collapsible = Collapsible(
+            *criteria_children,
+            title=ev_ref,
+            collapsed=True,
+            id=f"ev-collapse-{ev_ref}",
+            classes="ev-collapse",
+        )
+        await ev_row.mount(collapsible)
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        btn_id = event.button.id or ""
+
+        if btn_id == "close-evaluation-edit-btn":
+            event.stop()
+            if self.on_close:
+                self.on_close()
+        elif btn_id == "save-evaluation-btn":
+            event.stop()
+            await self._handle_save()
+        elif btn_id == "delete-evaluation-btn":
+            event.stop()
+            await self._handle_delete()
+
+    async def on_checkbox_changed(self, event: Checkbox.Changed) -> None:
+        """Track enabled/disabled evaluators."""
+        cb_id = event.checkbox.id or ""
+        if cb_id.startswith("ev-enable-"):
+            ev_ref = cb_id.replace("ev-enable-", "")
+            if event.value:
+                self._enabled_evaluators.add(ev_ref)
+            else:
+                self._enabled_evaluators.discard(ev_ref)
+
+    async def _handle_save(self) -> None:
+        """Collect form data and emit to parent for persistence."""
+        if not self._eval_set_data:
+            return
+
+        try:
+            name_input = self.query_one("#evaluation-name-input", Input)
+            inputs_area = self.query_one("#evaluation-inputs-textarea", TextArea)
+
+            name = name_input.value.strip()
+            if not name:
+                self.app.notify("Name is required", severity="error")
+                return
+
+            # Parse inputs JSON
+            try:
+                inputs = json.loads(inputs_area.text)
+            except json.JSONDecodeError:
+                self.app.notify("Invalid JSON in inputs", severity="error")
+                return
+
+            # Collect evaluation criteria for all evaluators
+            evaluation_criteria: dict[str, dict[str, Any] | None] = {}
+
+            evaluator_refs = self._eval_set_data.get("evaluatorRefs", [])
+            for ev_ref in evaluator_refs:
+                # Disabled evaluators get null value
+                if ev_ref not in self._enabled_evaluators:
+                    evaluation_criteria[ev_ref] = None
+                    continue
+
+                # Load evaluator to get criteria keys
+                ev_data = self.evaluator_service.load_evaluator(ev_ref)
+                default_criteria: dict[str, Any] = {}
+                if ev_data:
+                    ev_config = ev_data.get("evaluatorConfig", {}) or {}
+                    default_criteria = ev_config.get("defaultEvaluationCriteria", {}) or {}
+
+                # Get existing criteria
+                current_criteria = self._evaluator_criterias.get(ev_ref, {})
+                merged_criteria = {**default_criteria, **current_criteria}
+
+                # Collect from inputs
+                criteria: dict[str, Any] = {}
+                for key in merged_criteria.keys():
+                    try:
+                        inp = self.query_one(f"#ev-criteria-{ev_ref}-{key}", Input)
+                        val_str = inp.value.strip()
+                        try:
+                            criteria[key] = json.loads(val_str)
+                        except json.JSONDecodeError:
+                            criteria[key] = val_str
+                    except Exception:
+                        criteria[key] = merged_criteria.get(key)
+
+                evaluation_criteria[ev_ref] = criteria
+
+            # Build form data to emit
+            form_data: dict[str, Any] = {
+                "name": name,
+                "inputs": inputs,
+                "evaluationCriterias": evaluation_criteria,
+                "is_add_mode": self._is_add_mode,
+                "eval_set_path": self._eval_set_path,
+            }
+
+            # Include existing ID for edit mode
+            if not self._is_add_mode and self._evaluation_data:
+                form_data["id"] = self._evaluation_data.get("id")
+            else:
+                # Generate ID for new evaluation
+                form_data["id"] = str(uuid.uuid4())[:8]
+
+            # Emit data to parent for persistence
+            if self.on_save:
+                self.on_save(form_data)
+
+        except Exception as e:
+            self.app.notify(f"Error collecting form data: {e}", severity="error")
+
+    async def _handle_delete(self) -> None:
+        """Emit eval_id to parent for deletion."""
+        if not self._evaluation_data:
+            return
+
+        eval_id = self._evaluation_data.get("id")
+        if not eval_id:
+            return
+
+        if self.on_delete:
+            self.on_delete(eval_id)
diff --git a/src/uipath/dev/ui/panels/evals/evaluations_list_panel.py b/src/uipath/dev/ui/panels/evals/evaluations_list_panel.py
new file mode 100644
index 0000000..8bf4ef2
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/evaluations_list_panel.py
@@ -0,0 +1,91 @@
+"""Evaluations list panel for displaying evaluations in an eval set."""
+
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal, Vertical
+from textual.widgets import Button, ListItem, ListView, Static
+
+
+class EvaluationsListPanel(Vertical):
+    """Panel that owns the evaluations ListView and its population logic."""
+
+    def __init__(
+        self,
+        on_add_clicked: Callable[[], None] | None = None,
+        on_assign_clicked: Callable[[], None] | None = None,
+        on_evaluation_selected: Callable[[dict[str, Any]], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the evaluations list panel.
+
+        Args:
+            on_add_clicked: Callback when "+ Add" button is clicked
+            on_assign_clicked: Callback when "+ Assign" button is clicked
+            on_evaluation_selected: Callback when an evaluation is selected
+        """
+        super().__init__(**kwargs)
+        self.on_add_clicked = on_add_clicked
+        self.on_assign_clicked = on_assign_clicked
+        self.on_evaluation_selected = on_evaluation_selected
+        self._eval_set_data: dict[str, Any] | None = None
+
+    def compose(self) -> ComposeResult:
+        """Compose the evaluations list UI."""
+        with Horizontal(classes="list-actions-row"):
+            yield Button(
+                "+ Add",
+                id="add-evaluation-btn",
+                variant="default",
+                classes="tiny-btn",
+            )
+            yield Button(
+                "+ Assign",
+                id="assign-evaluator-btn",
+                variant="default",
+                classes="tiny-btn",
+            )
+        yield ListView(id="evaluations-list", classes="eval-items-list")
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        if event.button.id == "add-evaluation-btn" and self.on_add_clicked:
+            self.on_add_clicked()
+        elif event.button.id == "assign-evaluator-btn" and self.on_assign_clicked:
+            self.on_assign_clicked()
+
+    async def on_list_view_selected(self, event: ListView.Selected) -> None:
+        """Handle list item selection."""
+        if event.list_view.id == "evaluations-list" and event.item:
+            eval_data = getattr(event.item, "eval_data", None)
+            if eval_data and self.on_evaluation_selected:
+                self.on_evaluation_selected(eval_data)
+
+    def set_eval_set_data(self, data: dict[str, Any] | None) -> None:
+        """Set the eval set data and refresh the list."""
+        self._eval_set_data = data
+        self.call_later(self._populate_list)
+
+    async def _populate_list(self) -> None:
+        """Populate the evaluations list from current data."""
+        try:
+            list_view = self.query_one("#evaluations-list", ListView)
+            await list_view.clear()
+
+            if not self._eval_set_data:
+                return
+
+            evaluations = self._eval_set_data.get("evaluations", [])
+            for eval_item in evaluations:
+                eval_id = eval_item.get("id", "")
+                eval_name = eval_item.get("name", eval_id)
+
+                item = ListItem(Static(f"{eval_name}"), classes="eval-list-item")
+                item.eval_data = eval_item  # type: ignore
+                await list_view.append(item)
+        except Exception:
+            pass
+
+    async def refresh_list(self) -> None:
+        """Refresh the list."""
+        await self._populate_list()
diff --git a/src/uipath/dev/ui/panels/evals/run_details_panel.py b/src/uipath/dev/ui/panels/evals/run_details_panel.py
new file mode 100644
index 0000000..7002a0f
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/run_details_panel.py
@@ -0,0 +1,394 @@
+"""Panel for displaying evaluation run details, traces, and logs."""
+
+from textual.app import ComposeResult
+from textual.containers import Container, Horizontal, Vertical
+from textual.reactive import reactive
+from textual.widgets import RichLog, TabbedContent, TabPane, Tree
+from textual.widgets.tree import TreeNode
+
+from uipath.dev.models.eval_run import EvalRun
+from uipath.dev.models.messages import LogMessage, TraceMessage
+
+
+class SpanDetailsDisplay(Container):
+    """Widget to display details of a selected span."""
+
+    def compose(self) -> ComposeResult:
+        """Compose the UI layout."""
+        yield RichLog(
+            id="eval-span-details",
+            max_lines=1000,
+            highlight=True,
+            markup=True,
+            classes="span-detail-log",
+        )
+
+    def show_span_details(self, trace_msg: TraceMessage):
+        """Display detailed information about a trace span."""
+        details_log = self.query_one("#eval-span-details", RichLog)
+        details_log.clear()
+
+        details_log.write(f"[bold cyan]Span: {trace_msg.span_name}[/bold cyan]")
+
+        details_log.write("")
+
+        color_map = {
+            "started": "blue",
+            "running": "yellow",
+            "completed": "green",
+            "failed": "red",
+            "error": "red",
+        }
+        color = color_map.get(trace_msg.status.lower(), "white")
+        details_log.write(f"Status: [{color}]{trace_msg.status.upper()}[/{color}]")
+
+        details_log.write(
+            f"Started: [dim]{trace_msg.timestamp.strftime('%H:%M:%S.%f')[:-3]}[/dim]"
+        )
+
+        if trace_msg.duration_ms is not None:
+            details_log.write(
+                f"Duration: [yellow]{trace_msg.duration_ms:.2f}ms[/yellow]"
+            )
+
+        if trace_msg.attributes:
+            details_log.write("")
+            details_log.write("[bold]Attributes:[/bold]")
+            for key, value in trace_msg.attributes.items():
+                details_log.write(f"  {key}: {value}")
+
+        details_log.write("")
+
+        details_log.write(f"[dim]Trace ID: {trace_msg.trace_id}[/dim]")
+        details_log.write(f"[dim]Span ID: {trace_msg.span_id}[/dim]")
+        details_log.write(f"[dim]Run ID: {trace_msg.run_id}[/dim]")
+
+        if trace_msg.parent_span_id:
+            details_log.write(f"[dim]Parent Span: {trace_msg.parent_span_id}[/dim]")
+
+
+class EvalRunDetailsPanel(Vertical):
+    """Panel showing details, traces, and logs for selected eval run with tabbed interface."""
+
+    current_run: reactive[EvalRun | None] = reactive(None)
+
+    def __init__(self, **kwargs):
+        """Initialize EvalRunDetailsPanel."""
+        super().__init__(**kwargs)
+        self.span_tree_nodes = {}
+        self.current_run = None
+
+    def compose(self) -> ComposeResult:
+        """Compose the UI layout."""
+        with TabbedContent(id="eval-run-details-tabs"):
+            with TabPane("Details", id="eval-details-tab"):
+                yield RichLog(
+                    id="eval-details-log",
+                    max_lines=1000,
+                    highlight=True,
+                    markup=True,
+                    classes="detail-log",
+                )
+
+            with TabPane("Traces", id="eval-traces-tab"):
+                with Horizontal(classes="traces-content"):
+                    # Left side - Span tree
+                    with Vertical(
+                        classes="spans-tree-section", id="eval-spans-tree-container"
+                    ):
+                        yield Tree("Trace", id="eval-spans-tree", classes="spans-tree")
+
+                    # Right side - Span details
+                    with Vertical(classes="span-details-section"):
+                        yield SpanDetailsDisplay(id="eval-span-details-display")
+
+            with TabPane("Logs", id="eval-logs-tab"):
+                yield RichLog(
+                    id="eval-logs-log",
+                    max_lines=1000,
+                    highlight=True,
+                    markup=True,
+                    classes="detail-log",
+                )
+
+    def watch_current_run(
+        self, old_value: EvalRun | None, new_value: EvalRun | None
+    ):
+        """Watch for changes to the current run."""
+        if new_value is not None:
+            if old_value != new_value:
+                self.current_run = new_value
+                self.show_run(new_value)
+
+    def update_run(self, eval_run: EvalRun) -> None:
+        """Update the displayed run information."""
+        self.current_run = eval_run
+        self._show_run_details(eval_run)
+        self._rebuild_spans_tree()
+
+    def show_run(self, run: EvalRun):
+        """Display details, traces, and logs for a specific run."""
+        self._show_run_details(run)
+
+        logs_log = self.query_one("#eval-logs-log", RichLog)
+        logs_log.clear()
+        for log in run.logs:
+            self.add_log(log)
+
+        self._rebuild_spans_tree()
+
+    def switch_tab(self, tab_id: str) -> None:
+        """Switch to a specific tab by id."""
+        tabbed = self.query_one(TabbedContent)
+        tabbed.active = tab_id
+
+    def clear(self) -> None:
+        """Clear the panel."""
+        self.current_run = None
+        try:
+            details_log = self.query_one("#eval-details-log", RichLog)
+            details_log.clear()
+            logs_log = self.query_one("#eval-logs-log", RichLog)
+            logs_log.clear()
+            spans_tree = self.query_one("#eval-spans-tree", Tree)
+            spans_tree.root.remove_children()
+        except Exception:
+            pass
+
+    def _show_run_details(self, run: EvalRun):
+        """Display detailed information about the run in the Details tab."""
+        details_log = self.query_one("#eval-details-log", RichLog)
+        details_log.clear()
+
+        details_log.write(f"[bold cyan]Run ID: {run.id}[/bold cyan]")
+        details_log.write("")
+
+        status_color_map = {
+            "pending": "grey50",
+            "running": "yellow",
+            "completed": "green",
+            "failed": "red",
+        }
+        color = status_color_map.get(run.status.lower(), "white")
+        details_log.write(
+            f"[bold]Status:[/bold] [{color}]{run.status.upper()}[/{color}]"
+        )
+
+        details_log.write(
+            f"[bold]Started:[/bold] [dim]{run.start_time.strftime('%Y-%m-%d %H:%M:%S')}[/dim]"
+        )
+
+        if run.end_time:
+            details_log.write(
+                f"[bold]Ended:[/bold] [dim]{run.end_time.strftime('%Y-%m-%d %H:%M:%S')}[/dim]"
+            )
+
+        details_log.write(f"[bold]Duration:[/bold] [yellow]{run.duration}[/yellow]")
+
+        details_log.write("")
+
+        # Eval set info
+        details_log.write(f"[bold]Eval Set:[/bold] {run.eval_set_path}")
+        details_log.write(f"[bold]Entrypoint:[/bold] {run.entrypoint}")
+        details_log.write(f"[bold]Workers:[/bold] {run.workers}")
+
+        details_log.write("")
+
+        if run.status == "completed":
+            details_log.write(
+                f"[bold]Overall Score:[/bold] [cyan]{run.overall_score * 100:.1f}%[/cyan]"
+            )
+            details_log.write(
+                f"[bold]Total Evaluations:[/bold] {run.total_evaluations}"
+            )
+
+            details_log.write("")
+            details_log.write("[bold]EVALUATOR SCORES:[/bold]")
+            details_log.write("[dim]" + "=" * 50 + "[/dim]")
+
+            for ev_id, score in run.evaluator_scores.items():
+                score_pct = f"{score * 100:.1f}%"
+                score_color = "green" if score == 1.0 else ("yellow" if score >= 0.5 else "red")
+                details_log.write(f"  [{score_color}]{ev_id}: {score_pct}[/{score_color}]")
+
+            details_log.write("")
+            details_log.write("[bold]EVALUATION RESULTS:[/bold]")
+            details_log.write("[dim]" + "=" * 50 + "[/dim]")
+
+            for eval_result in run.evaluation_results:
+                passed = eval_result.passed
+                icon = "✓" if passed else "✗"
+                icon_color = "green" if passed else "red"
+                details_log.write(
+                    f"[{icon_color}]{icon}[/{icon_color}] [bold]{eval_result.eval_name}[/bold]"
+                )
+
+                for ev_result in eval_result.evaluator_results:
+                    result_score = f"{ev_result.score * 100:.0f}%"
+                    result_color = "green" if ev_result.score == 1.0 else "red"
+                    details_log.write(
+                        f"    [{result_color}]{ev_result.evaluator_name}: {result_score}[/{result_color}]"
+                    )
+                    if ev_result.justification:
+                        justification = ev_result.justification
+                        truncated = (
+                            f"{justification[:100]}..."
+                            if len(justification) > 100
+                            else justification
+                        )
+                        details_log.write(f"      [dim]{truncated}[/dim]")
+
+        elif run.status == "failed" and run.error:
+            details_log.write("[bold red]ERROR:[/bold red]")
+            details_log.write("[dim]" + "=" * 50 + "[/dim]")
+            if run.error.code:
+                details_log.write(f"[red]Code: {run.error.code}[/red]")
+            details_log.write(f"[red]Title: {run.error.title}[/red]")
+            if run.error.detail:
+                details_log.write(f"[red]\n{run.error.detail}[/red]")
+
+        elif run.status == "running":
+            details_log.write("[bold yellow]Running...[/bold yellow]")
+
+    def _rebuild_spans_tree(self):
+        """Rebuild the spans tree from current run's traces."""
+        spans_tree = self.query_one("#eval-spans-tree", Tree)
+        if spans_tree is None or spans_tree.root is None:
+            return
+
+        spans_tree.root.remove_children()
+
+        self.span_tree_nodes.clear()
+
+        if not self.current_run or not self.current_run.traces:
+            return
+
+        self._build_spans_tree(self.current_run.traces)
+
+        # Expand the root "Trace" node
+        spans_tree.root.expand()
+
+    def _build_spans_tree(self, trace_messages: list[TraceMessage]):
+        """Build the spans tree from trace messages."""
+        spans_tree = self.query_one("#eval-spans-tree", Tree)
+        root = spans_tree.root
+
+        # Filter out spans without parents (artificial root spans)
+        spans_by_id = {
+            msg.span_id: msg for msg in trace_messages if msg.parent_span_id is not None
+        }
+
+        # Build parent-to-children mapping once upfront
+        children_by_parent: dict[str, list[TraceMessage]] = {}
+        for msg in spans_by_id.values():
+            if msg.parent_span_id:
+                if msg.parent_span_id not in children_by_parent:
+                    children_by_parent[msg.parent_span_id] = []
+                children_by_parent[msg.parent_span_id].append(msg)
+
+        # Find root spans (parent doesn't exist in our filtered data)
+        root_spans = [
+            msg
+            for msg in trace_messages
+            if msg.parent_span_id and msg.parent_span_id not in spans_by_id
+        ]
+
+        # Build tree recursively for each root span
+        for root_span in sorted(root_spans, key=lambda x: x.timestamp):
+            self._add_span_with_children(root, root_span, children_by_parent)
+
+    def _add_span_with_children(
+        self,
+        parent_node: TreeNode[str],
+        trace_msg: TraceMessage,
+        children_by_parent: dict[str, list[TraceMessage]],
+    ):
+        """Recursively add a span and all its children."""
+        color_map = {
+            "started": "🔵",
+            "running": "🟡",
+            "completed": "🟢",
+            "failed": "🔴",
+            "error": "🔴",
+        }
+        status_icon = color_map.get(trace_msg.status.lower(), "⚪")
+        duration_str = (
+            f" ({trace_msg.duration_ms:.1f}ms)" if trace_msg.duration_ms else ""
+        )
+        label = f"{status_icon} {trace_msg.span_name}{duration_str}"
+
+        node = parent_node.add(label)
+        node.data = trace_msg.span_id
+        self.span_tree_nodes[trace_msg.span_id] = node
+        node.expand()
+
+        # Get children from prebuilt mapping - O(1) lookup
+        children = children_by_parent.get(trace_msg.span_id, [])
+        for child in sorted(children, key=lambda x: x.timestamp):
+            self._add_span_with_children(node, child, children_by_parent)
+
+    def on_tree_node_selected(self, event: Tree.NodeSelected[str]) -> None:
+        """Handle span selection in the tree."""
+        # Check if this is our spans tree
+        spans_tree = self.query_one("#eval-spans-tree", Tree)
+        if event.control != spans_tree:
+            return
+
+        # Get the selected span data
+        if hasattr(event.node, "data") and event.node.data:
+            span_id = event.node.data
+            # Find the trace in current_run.traces
+            trace_msg = None
+            if self.current_run:
+                for trace in self.current_run.traces:
+                    if trace.span_id == span_id:
+                        trace_msg = trace
+                        break
+
+            if trace_msg:
+                span_details_display = self.query_one(
+                    "#eval-span-details-display", SpanDetailsDisplay
+                )
+                span_details_display.show_span_details(trace_msg)
+
+    def add_trace(self, trace_msg: TraceMessage):
+        """Add trace to current run if it matches."""
+        if not self.current_run or trace_msg.run_id != self.current_run.id:
+            return
+
+        # Rebuild the tree to include new trace
+        self._rebuild_spans_tree()
+
+    def add_log(self, log_msg: LogMessage):
+        """Add log to current run if it matches."""
+        if not self.current_run or log_msg.run_id != self.current_run.id:
+            return
+
+        color_map = {
+            "DEBUG": "dim cyan",
+            "INFO": "blue",
+            "WARN": "yellow",
+            "WARNING": "yellow",
+            "ERROR": "red",
+            "CRITICAL": "bold red",
+        }
+
+        color = color_map.get(log_msg.level.upper(), "white")
+        timestamp_str = log_msg.timestamp.strftime("%H:%M:%S")
+        level_short = log_msg.level[:4].upper()
+
+        logs_log = self.query_one("#eval-logs-log", RichLog)
+        if isinstance(log_msg.message, str):
+            log_text = (
+                f"[dim]{timestamp_str}[/dim] "
+                f"[{color}]{level_short}[/{color}] "
+                f"{log_msg.message}"
+            )
+            logs_log.write(log_text)
+        else:
+            logs_log.write(log_msg.message)
+
+    def refresh_display(self):
+        """Refresh the display with current run data."""
+        if self.current_run:
+            self.show_run(self.current_run)
diff --git a/src/uipath/dev/ui/panels/evals/runs_list_panel.py b/src/uipath/dev/ui/panels/evals/runs_list_panel.py
new file mode 100644
index 0000000..4aff2ce
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evals/runs_list_panel.py
@@ -0,0 +1,153 @@
+"""Eval runs list panel for displaying eval runs for an eval set."""
+
+from pathlib import Path
+from typing import Callable
+
+from rich.text import Text
+from textual.app import ComposeResult
+from textual.containers import Vertical
+from textual.widgets import ListItem, ListView, Static
+
+from uipath.dev.models.eval_run import EvalRun
+from uipath.dev.services.eval_run_service import EvalRunService
+
+
+class EvalRunsListPanel(Vertical):
+    """Panel that owns the eval runs ListView and its population logic."""
+
+    def __init__(
+        self,
+        eval_run_service: EvalRunService | None = None,
+        on_run_selected: Callable[[EvalRun], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the eval runs list panel.
+
+        Args:
+            eval_run_service: The eval run service for loading runs
+            on_run_selected: Callback when a run is selected
+        """
+        super().__init__(**kwargs)
+        self.eval_run_service = eval_run_service
+        self.on_run_selected = on_run_selected
+        self._selected_eval_set: str = ""
+
+    def compose(self) -> ComposeResult:
+        """Compose the eval runs list UI."""
+        yield ListView(id="eval-runs-list", classes="eval-items-list")
+
+    def on_mount(self) -> None:
+        """Set up periodic refresh for running items."""
+        self.set_interval(5.0, self._refresh_running_items)
+
+    async def on_list_view_selected(self, event: ListView.Selected) -> None:
+        """Handle list item selection."""
+        if event.list_view.id == "eval-runs-list" and event.item:
+            eval_run = getattr(event.item, "eval_run", None)
+            if eval_run and self.on_run_selected:
+                self.on_run_selected(eval_run)
+
+    def set_eval_set(self, eval_set_path: str) -> None:
+        """Set the selected eval set and refresh the list."""
+        self._selected_eval_set = eval_set_path
+        self.call_later(self._populate_list)
+
+    def set_eval_run_service(self, eval_run_service: EvalRunService) -> None:
+        """Set the eval run service."""
+        self.eval_run_service = eval_run_service
+
+    async def _populate_list(self) -> None:
+        """Populate the eval runs list from the service."""
+        if not self._selected_eval_set or not self.eval_run_service:
+            return
+
+        try:
+            eval_set_name = Path(self._selected_eval_set).stem
+            runs = self.eval_run_service.get_runs_for_eval_set(eval_set_name)
+
+            list_view = self.query_one("#eval-runs-list", ListView)
+            await list_view.clear()
+
+            for run in runs:
+                item = ListItem(
+                    Static(self._format_run_label(run)),
+                    classes=f"eval-list-item run-{run.status}",
+                )
+                item.eval_run_id = run.id  # type: ignore
+                item.eval_run = run  # type: ignore
+                await list_view.append(item)
+        except Exception:
+            pass
+
+    async def refresh_list(self) -> None:
+        """Public method to refresh the list."""
+        await self._populate_list()
+
+    def add_run(self, run: EvalRun) -> None:
+        """Add a new run to the list (at the top)."""
+        self.call_later(self._populate_list)
+
+    def update_run(self, run: EvalRun) -> None:
+        """Update an existing run's display (targeted update, no full rebuild)."""
+        self.call_later(lambda: self._update_list_item(run))
+
+    async def _update_list_item(self, run: EvalRun) -> None:
+        """Update only the ListItem corresponding to a single run."""
+        try:
+            list_view = self.query_one("#eval-runs-list", ListView)
+        except Exception:
+            return
+
+        for item in list(list_view.children):
+            run_id = getattr(item, "eval_run_id", None)
+            if run_id != run.id:
+                continue
+
+            # Update label with formatted display_name
+            try:
+                static = item.query_one(Static)
+                static.update(self._format_run_label(run))
+            except Exception:
+                continue
+
+            # Update the stored run reference
+            item.eval_run = run  # type: ignore
+
+            # Update status-related CSS class
+            new_classes = [cls for cls in item.classes if not cls.startswith("run-")]
+            new_classes.append(f"run-{run.status}")
+            item.set_classes(" ".join(new_classes))
+            break
+
+    def _format_run_label(self, run: EvalRun) -> Text:
+        """Format the label for a run item."""
+        base = run.display_name
+
+        if not isinstance(base, Text):
+            base = Text(str(base))
+
+        text = base.copy()
+
+        if not text.plain.startswith(" "):
+            text = Text(" ") + text
+
+        return text
+
+    def _refresh_running_items(self) -> None:
+        """Refresh display names for running items only."""
+        if not self.eval_run_service:
+            return
+
+        # Get all runs for current eval set
+        if not self._selected_eval_set:
+            return
+
+        eval_set_name = Path(self._selected_eval_set).stem
+        runs = self.eval_run_service.get_runs_for_eval_set(eval_set_name)
+
+        if not any(run.status == "running" for run in runs):
+            return
+
+        for run in runs:
+            if run.status == "running":
+                self.call_later(lambda r=run: self._update_list_item(r))
diff --git a/src/uipath/dev/ui/panels/evaluators/__init__.py b/src/uipath/dev/ui/panels/evaluators/__init__.py
new file mode 100644
index 0000000..17d9b29
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evaluators/__init__.py
@@ -0,0 +1,7 @@
+"""Evaluators panel components."""
+
+from uipath.dev.ui.panels.evaluators.evaluator_form_panel import EvaluatorFormPanel
+
+__all__ = [
+    "EvaluatorFormPanel",
+]
diff --git a/src/uipath/dev/ui/panels/evaluators/evaluator_form_panel.py b/src/uipath/dev/ui/panels/evaluators/evaluator_form_panel.py
new file mode 100644
index 0000000..0efd9ac
--- /dev/null
+++ b/src/uipath/dev/ui/panels/evaluators/evaluator_form_panel.py
@@ -0,0 +1,611 @@
+"""Panel for creating and editing evaluators."""
+
+import json
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal, ScrollableContainer, Vertical
+from textual.widgets import (
+    Button,
+    Checkbox,
+    Input,
+    ListItem,
+    ListView,
+    Static,
+    TextArea,
+)
+
+from uipath.dev.models.evaluator_types import EVALUATOR_TYPES
+
+
+class EvaluatorFormPanel(Vertical):
+    """Panel for evaluator creation and editing."""
+
+    def __init__(
+        self,
+        on_save: Callable[[dict[str, Any]], None] | None = None,
+        on_delete: Callable[[str], None] | None = None,
+        on_close: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the evaluator form panel.
+
+        Args:
+            on_save: Callback with form data when save/create is clicked.
+            on_delete: Callback with evaluator_id when delete is clicked.
+            on_close: Callback when close/cancel is clicked.
+        """
+        super().__init__(**kwargs)
+        self.on_save = on_save
+        self.on_delete = on_delete
+        self.on_close = on_close
+
+        # Current state
+        self._mode: str = "templates"  # templates, create, edit
+        self._selected_type_id: str = ""
+        self._selected_type_def: dict[str, Any] = {}
+        self._editing_evaluator_id: str = ""
+        self._editing_evaluator_data: dict[str, Any] = {}
+        self._external_container: ScrollableContainer | None = None
+
+    def compose(self) -> ComposeResult:
+        """Compose the panel UI."""
+        yield ScrollableContainer(id="evaluator-form-content")
+
+    async def on_mount(self) -> None:
+        """Show templates list on mount."""
+        await self.show_templates()
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        btn_id = event.button.id or ""
+
+        if btn_id == "create-evaluator-btn":
+            await self._handle_create()
+        elif btn_id == "save-edited-evaluator-btn":
+            await self._handle_save_edit()
+        elif btn_id == "delete-edited-evaluator-btn":
+            await self._handle_delete()
+        elif btn_id == "close-evaluator-edit-btn":
+            if self.on_close:
+                self.on_close()
+            await self.show_templates()
+
+    async def show_templates(self) -> None:
+        """Show the evaluator templates list."""
+        self._mode = "templates"
+        self._external_container = None  # Clear external container reference
+        content = self.query_one("#evaluator-form-content", ScrollableContainer)
+        await self._clear_content(content)
+
+        # Header
+        await content.mount(
+            Static("[bold]Select a Template[/bold]", classes="panel-title")
+        )
+        await content.mount(
+            Static(
+                "[dim]Choose an evaluator type to create from[/dim]",
+                classes="helper-text",
+            )
+        )
+
+        # Templates list
+        templates_list = ListView(
+            id="evaluator-templates-list", classes="eval-items-list"
+        )
+        await content.mount(templates_list)
+
+        for type_id, type_def in EVALUATOR_TYPES.items():
+            type_name = type_def.get("name", type_id)
+            category = type_def.get("category", "")
+
+            display = f"{type_name}"
+            if category:
+                display += f" [{category}]"
+
+            item = ListItem(Static(display), classes="eval-list-item")
+            item.type_id = type_id  # type: ignore
+            item.type_def = type_def  # type: ignore
+            await templates_list.append(item)
+
+    async def show_create_form(self, type_id: str, type_def: dict[str, Any]) -> None:
+        """Show the create form for a specific evaluator type in this panel's content."""
+        self._mode = "create"
+        self._selected_type_id = type_id
+        self._selected_type_def = type_def
+        self._external_container = None  # Not using external container for this mode
+
+        content = self.query_one("#evaluator-form-content", ScrollableContainer)
+        await self._clear_content(content)
+        await self._populate_create_form(content, type_id, type_def)
+        self.refresh(layout=True)
+
+    async def populate_create_form_in_container(
+        self,
+        container: ScrollableContainer,
+        type_id: str,
+        type_def: dict[str, Any],
+    ) -> None:
+        """Populate the create form into an external container (e.g., right panel).
+
+        This stores the type info for later use by get_create_form_data().
+        """
+        self._mode = "create"
+        self._selected_type_id = type_id
+        self._selected_type_def = type_def
+        self._external_container = container  # Store reference for form data collection
+        await self._populate_create_form(container, type_id, type_def)
+
+    async def _populate_create_form(
+        self,
+        content: ScrollableContainer,
+        type_id: str,
+        type_def: dict[str, Any],
+    ) -> None:
+        """Populate create form widgets into a container."""
+        type_name = type_def.get("name", type_id)
+        description = type_def.get("description", "")
+        config_fields = type_def.get("config_fields", [])
+        criteria_fields = type_def.get("criteria_fields", [])
+
+        # Close button
+        await content.mount(Button("✕", id="close-right-panel-btn", classes="close-btn"))
+
+        # Header
+        await content.mount(
+            Static(f"[bold]Create {type_name}[/bold]", classes="panel-title")
+        )
+        if description:
+            await content.mount(
+                Static(f"[dim]{description}[/dim]", classes="helper-text")
+            )
+
+        # Evaluator ID
+        await content.mount(
+            Static("[bold]Evaluator ID *[/bold]", classes="detail-row")
+        )
+        await content.mount(Input(placeholder="my-evaluator", id="new-evaluator-id"))
+
+        # Description
+        await content.mount(Static("[bold]Description[/bold]", classes="detail-row"))
+        await content.mount(
+            Input(
+                placeholder="Description of this evaluator", id="new-evaluator-desc"
+            )
+        )
+
+        # Configuration fields
+        if config_fields:
+            await content.mount(
+                Static("[bold]Configuration[/bold]", classes="section-header")
+            )
+            await self._mount_config_fields(content, config_fields, prefix="config")
+
+        # Criteria fields
+        if criteria_fields:
+            await content.mount(
+                Static(
+                    "[bold]Default Evaluation Criteria[/bold]",
+                    classes="section-header",
+                )
+            )
+            await content.mount(
+                Static(
+                    "[dim]These values will be used as defaults when running evaluations.[/dim]",
+                    classes="helper-text",
+                )
+            )
+            await self._mount_criteria_fields(content, criteria_fields, prefix="criteria")
+
+        # Create button
+        button_row = Horizontal(classes="button-row")
+        await content.mount(button_row)
+        await button_row.mount(
+            Button(
+                "Create Evaluator",
+                id="create-evaluator-btn",
+                variant="primary",
+                classes="small-btn",
+            )
+        )
+
+    async def show_edit_form(self, evaluator_id: str, ev_data: dict[str, Any]) -> None:
+        """Show the edit form for an existing evaluator.
+
+        Args:
+            evaluator_id: The evaluator ID.
+            ev_data: The evaluator data (loaded by parent).
+        """
+        self._mode = "edit"
+        self._editing_evaluator_id = evaluator_id
+        self._editing_evaluator_data = ev_data
+        self._external_container = None  # Not using external container for edit mode
+
+        content = self.query_one("#evaluator-form-content", ScrollableContainer)
+        await self._clear_content(content)
+
+        # Get type info
+        ev_type = ev_data.get("evaluatorTypeId", "")
+        type_def = EVALUATOR_TYPES.get(ev_type, {})
+        type_name = type_def.get("name", ev_type)
+        type_description = type_def.get("description", "")
+        config_fields = type_def.get("config_fields", [])
+        criteria_fields = type_def.get("criteria_fields", [])
+
+        # Get current values
+        ev_config = ev_data.get("evaluatorConfig", ev_data.get("config", {}))
+        default_criteria = ev_config.get("defaultEvaluationCriteria", {})
+
+        # Header
+        await content.mount(
+            Static(f"[bold]Edit {type_name}[/bold]", classes="panel-title")
+        )
+        if type_description:
+            await content.mount(
+                Static(f"[dim]{type_description}[/dim]", classes="helper-text")
+            )
+
+        # Evaluator ID (read-only)
+        await content.mount(
+            Static("[bold]Evaluator ID *[/bold]", classes="detail-row")
+        )
+        await content.mount(Static(f"[dim]{evaluator_id}[/dim]", classes="detail-row"))
+
+        # Description
+        description = ev_data.get("description", "")
+        await content.mount(Static("[bold]Description[/bold]", classes="detail-row"))
+        await content.mount(
+            Input(
+                value=description,
+                placeholder="Description of this evaluator",
+                id="edit-evaluator-desc",
+            )
+        )
+
+        # Configuration fields
+        if config_fields:
+            await content.mount(
+                Static("[bold]Configuration[/bold]", classes="section-header")
+            )
+            await self._mount_config_fields(
+                content, config_fields, prefix="edit-config",
+                current_values=ev_config
+            )
+
+        # Criteria fields
+        if criteria_fields:
+            await content.mount(
+                Static(
+                    "[bold]Default Evaluation Criteria[/bold]",
+                    classes="section-header",
+                )
+            )
+            await content.mount(
+                Static(
+                    "[dim]These values will be used as defaults when running evaluations.[/dim]",
+                    classes="helper-text",
+                )
+            )
+            await self._mount_criteria_fields(
+                content, criteria_fields, prefix="edit-criteria",
+                current_values=default_criteria
+            )
+
+        # Buttons
+        button_row = Horizontal(classes="button-row")
+        await content.mount(button_row)
+        await button_row.mount(
+            Button("Save", id="save-edited-evaluator-btn", variant="primary", classes="small-btn")
+        )
+        await button_row.mount(
+            Button("Delete", id="delete-edited-evaluator-btn", variant="error", classes="small-btn")
+        )
+        await button_row.mount(
+            Button("Close", id="close-evaluator-edit-btn", variant="default", classes="small-btn")
+        )
+
+        self.refresh(layout=True)
+
+    def show_placeholder(self) -> None:
+        """Show placeholder text."""
+        self._mode = "templates"
+        try:
+            content = self.query_one("#evaluator-form-content", ScrollableContainer)
+            self._clear_content_sync(content)
+            content.mount(
+                Static(
+                    "[dim]Click '+ Add' to create a new evaluator, or select an existing evaluator from the list.[/dim]",
+                    classes="helper-text",
+                )
+            )
+        except Exception:
+            pass
+
+    def get_selected_type(self) -> tuple[str, dict[str, Any]]:
+        """Get the currently selected type ID and definition."""
+        return self._selected_type_id, self._selected_type_def
+
+    # =========================================================================
+    # Form Data Collection
+    # =========================================================================
+
+    def get_create_form_data(self) -> dict[str, Any] | None:
+        """Collect data from the create form."""
+        try:
+            # Query from external container if form is in right panel, otherwise from self
+            query_target = self._external_container if self._external_container else self
+            evaluator_id = query_target.query_one("#new-evaluator-id", Input).value.strip()
+            description = query_target.query_one("#new-evaluator-desc", Input).value.strip()
+
+            if not evaluator_id:
+                self.app.notify("Please enter an evaluator ID", severity="error")
+                return None
+
+            config = self._collect_config_values("config")
+            config["name"] = evaluator_id
+            criteria = self._collect_criteria_values("criteria")
+
+            return {
+                "evaluator_id": evaluator_id,
+                "type_id": self._selected_type_id,
+                "description": description,
+                "config": config,
+                "default_criteria": criteria if criteria else None,
+            }
+        except Exception as e:
+            self.app.notify(f"Error collecting form data: {e}", severity="error")
+            return None
+
+    def get_edit_form_data(self) -> dict[str, Any] | None:
+        """Collect data from the edit form."""
+        try:
+            description = self.query_one("#edit-evaluator-desc", Input).value.strip()
+            config = self._collect_config_values("edit-config")
+            config["name"] = self._editing_evaluator_id
+            criteria = self._collect_criteria_values("edit-criteria")
+
+            return {
+                "evaluator_id": self._editing_evaluator_id,
+                "type_id": self._editing_evaluator_data.get("evaluatorTypeId", ""),
+                "description": description,
+                "config": config,
+                "default_criteria": criteria if criteria else {},
+            }
+        except Exception as e:
+            self.app.notify(f"Error collecting form data: {e}", severity="error")
+            return None
+
+    async def _clear_content(self, content: ScrollableContainer) -> None:
+        """Clear all children from container."""
+        for child in list(content.children):
+            child.remove()
+
+    def _clear_content_sync(self, content: ScrollableContainer) -> None:
+        """Clear all children from container (sync version)."""
+        for child in list(content.children):
+            child.remove()
+
+    async def _mount_config_fields(
+        self,
+        content: ScrollableContainer,
+        fields: list[dict[str, Any]],
+        prefix: str,
+        current_values: dict[str, Any] | None = None,
+        fallback_values: dict[str, Any] | None = None,
+    ) -> None:
+        """Mount configuration fields."""
+        current_values = current_values or {}
+        fallback_values = fallback_values or {}
+
+        for field in fields:
+            field_name = field.get("name", "")
+            if field_name == "name":
+                continue
+
+            field_label = field.get("label", field_name)
+            field_type = field.get("type", "string")
+            field_default = field.get("default", "")
+            field_desc = field.get("description", "")
+            required = field.get("required", False)
+
+            current_value = current_values.get(
+                field_name, fallback_values.get(field_name, field_default)
+            )
+
+            label_text = f"[bold]{field_label}{'*' if required else ''}[/bold]"
+            await content.mount(Static(label_text, classes="detail-row"))
+            if field_desc:
+                await content.mount(
+                    Static(f"[dim]{field_desc}[/dim]", classes="helper-text")
+                )
+
+            if field_type == "boolean":
+                await content.mount(
+                    Checkbox(
+                        field_label,
+                        value=bool(current_value),
+                        id=f"{prefix}-{field_name}",
+                    )
+                )
+            else:
+                value_str = (
+                    json.dumps(current_value)
+                    if isinstance(current_value, (dict, list))
+                    else str(current_value or "")
+                )
+                if "\n" in value_str or field_type in ("text", "textarea"):
+                    await content.mount(
+                        TextArea(
+                            value_str,
+                            id=f"{prefix}-{field_name}",
+                            classes="detail-json",
+                        )
+                    )
+                else:
+                    await content.mount(
+                        Input(
+                            value=value_str,
+                            placeholder=field_label,
+                            id=f"{prefix}-{field_name}",
+                        )
+                    )
+
+    async def _mount_criteria_fields(
+        self,
+        content: ScrollableContainer,
+        fields: list[dict[str, Any]],
+        prefix: str,
+        current_values: dict[str, Any] | None = None,
+        fallback_values: dict[str, Any] | None = None,
+    ) -> None:
+        """Mount criteria fields."""
+        current_values = current_values or {}
+        fallback_values = fallback_values or {}
+
+        for field in fields:
+            field_name = field.get("name", "")
+            field_label = field.get("label", field_name)
+            field_type = field.get("type", "string")
+            required = field.get("required", False)
+
+            current_value = current_values.get(
+                field_name, fallback_values.get(field_name, "")
+            )
+
+            label_text = f"[bold]{field_label}{'*' if required else ''}[/bold]"
+            await content.mount(Static(label_text, classes="detail-row"))
+
+            if field_type == "boolean":
+                await content.mount(
+                    Checkbox(
+                        field_label,
+                        value=bool(current_value),
+                        id=f"{prefix}-{field_name}",
+                    )
+                )
+            else:
+                value_str = (
+                    json.dumps(current_value)
+                    if isinstance(current_value, (dict, list))
+                    else str(current_value or "")
+                )
+                await content.mount(
+                    Input(
+                        value=value_str,
+                        placeholder=field_label,
+                        id=f"{prefix}-{field_name}",
+                    )
+                )
+
+    def _collect_config_values(self, prefix: str) -> dict[str, Any]:
+        """Collect configuration values from form."""
+        config: dict[str, Any] = {}
+        type_def = self._selected_type_def if self._mode == "create" else EVALUATOR_TYPES.get(
+            self._editing_evaluator_data.get("evaluatorTypeId", ""), {}
+        )
+        config_fields = type_def.get("config_fields", [])
+
+        # Query from external container if form is in right panel (create mode), otherwise from self
+        query_target = self._external_container if (self._mode == "create" and self._external_container) else self
+
+        for field in config_fields:
+            field_name = field.get("name", "")
+            field_type = field.get("type", "string")
+            if field_name == "name":
+                continue
+
+            try:
+                widget_id = f"#{prefix}-{field_name}"
+                if field_type == "boolean":
+                    checkbox = query_target.query_one(widget_id, Checkbox)
+                    config[field_name] = checkbox.value
+                else:
+                    try:
+                        textarea = query_target.query_one(widget_id, TextArea)
+                        value = textarea.text.strip()
+                    except Exception:
+                        input_widget = query_target.query_one(widget_id, Input)
+                        value = input_widget.value.strip()
+                    if value:
+                        if value.startswith("{") or value.startswith("["):
+                            try:
+                                config[field_name] = json.loads(value)
+                            except json.JSONDecodeError:
+                                config[field_name] = value
+                        else:
+                            config[field_name] = value
+            except Exception:
+                pass
+
+        return config
+
+    def _collect_criteria_values(self, prefix: str) -> dict[str, Any]:
+        """Collect criteria values from form."""
+        criteria: dict[str, Any] = {}
+        type_def = self._selected_type_def if self._mode == "create" else EVALUATOR_TYPES.get(
+            self._editing_evaluator_data.get("evaluatorTypeId", ""), {}
+        )
+        criteria_fields = type_def.get("criteria_fields", [])
+
+        # Query from external container if form is in right panel (create mode), otherwise from self
+        query_target = self._external_container if (self._mode == "create" and self._external_container) else self
+
+        for field in criteria_fields:
+            field_name = field.get("name", "")
+            field_type = field.get("type", "string")
+
+            try:
+                widget_id = f"#{prefix}-{field_name}"
+                if field_type == "boolean":
+                    checkbox = query_target.query_one(widget_id, Checkbox)
+                    criteria[field_name] = checkbox.value
+                else:
+                    input_widget = query_target.query_one(widget_id, Input)
+                    value = input_widget.value.strip()
+                    if value:
+                        if value.startswith("{") or value.startswith("["):
+                            try:
+                                criteria[field_name] = json.loads(value)
+                            except json.JSONDecodeError:
+                                criteria[field_name] = value
+                        else:
+                            criteria[field_name] = value
+            except Exception:
+                pass
+
+        return criteria
+
+    async def _handle_create(self) -> None:
+        """Collect form data and emit to parent for persistence."""
+        form_data = self.get_create_form_data()
+        if not form_data:
+            return
+
+        # Mark as create mode
+        form_data["is_create"] = True
+
+        # Emit data to parent for persistence
+        if self.on_save:
+            self.on_save(form_data)
+
+    async def _handle_save_edit(self) -> None:
+        """Collect form data and emit to parent for persistence."""
+        form_data = self.get_edit_form_data()
+        if not form_data:
+            return
+
+        # Mark as edit mode
+        form_data["is_create"] = False
+
+        # Emit data to parent for persistence
+        if self.on_save:
+            self.on_save(form_data)
+
+    async def _handle_delete(self) -> None:
+        """Emit evaluator_id to parent for deletion."""
+        if not self._editing_evaluator_id:
+            return
+
+        # Emit evaluator_id to parent for persistence
+        if self.on_delete:
+            self.on_delete(self._editing_evaluator_id)
diff --git a/src/uipath/dev/ui/panels/runs/__init__.py b/src/uipath/dev/ui/panels/runs/__init__.py
new file mode 100644
index 0000000..b33635b
--- /dev/null
+++ b/src/uipath/dev/ui/panels/runs/__init__.py
@@ -0,0 +1,9 @@
+"""Runs panel components for execution runs."""
+
+from uipath.dev.ui.panels.runs.new_run_panel import NewRunPanel
+from uipath.dev.ui.panels.runs.run_details_panel import RunDetailsPanel
+
+__all__ = [
+    "NewRunPanel",
+    "RunDetailsPanel",
+]
diff --git a/src/uipath/dev/ui/panels/new_run_panel.py b/src/uipath/dev/ui/panels/runs/new_run_panel.py
similarity index 78%
rename from src/uipath/dev/ui/panels/new_run_panel.py
rename to src/uipath/dev/ui/panels/runs/new_run_panel.py
index 07cd53b..8f79f09 100644
--- a/src/uipath/dev/ui/panels/new_run_panel.py
+++ b/src/uipath/dev/ui/panels/runs/new_run_panel.py
@@ -6,13 +6,12 @@
 from textual.app import ComposeResult
 from textual.containers import Container, Horizontal, Vertical
 from textual.reactive import reactive
-from textual.widgets import Button, Select, TabbedContent, TabPane
+from textual.widgets import Button, Select
 from uipath.runtime import UiPathRuntimeFactoryProtocol, UiPathRuntimeProtocol
 
+from uipath.dev.ui.panels._json_schema import mock_json_from_schema
 from uipath.dev.ui.widgets.json_input import JsonInput
 
-from ._json_schema import mock_json_from_schema
-
 
 class NewRunPanel(Container):
     """Panel for creating new runs with a Select entrypoint selector."""
@@ -37,41 +36,40 @@ def __init__(
 
     def compose(self) -> ComposeResult:
         """Compose the UI layout."""
-        with TabbedContent():
-            with TabPane("New run", id="new-tab"):
-                with Vertical():
-                    yield Select(
-                        options=[],
-                        id="entrypoint-select",
-                        allow_blank=True,
-                    )
-
-                    yield JsonInput(
-                        text=self.initial_input,
-                        language="json",
-                        id="json-input",
-                        classes="input-field json-input",
-                    )
-
-                    with Horizontal(classes="run-actions"):
-                        yield Button(
-                            "▶ Run",
-                            id="execute-btn",
-                            variant="primary",
-                            classes="action-btn",
-                        )
-                        yield Button(
-                            "⏸ Debug",
-                            id="debug-btn",
-                            variant="primary",
-                            classes="action-btn",
-                        )
-                        yield Button(
-                            "💬 Chat",
-                            id="chat-btn",
-                            variant="primary",
-                            classes="action-btn",
-                        )
+        with Vertical():
+            yield Select(
+                options=[],
+                id="entrypoint-select",
+                allow_blank=True,
+            )
+
+            yield JsonInput(
+                text=self.initial_input,
+                language="json",
+                id="json-input",
+                classes="input-field json-input",
+            )
+
+            with Horizontal(classes="run-actions"):
+                yield Button(
+                    "▶ Run",
+                    id="execute-btn",
+                    variant="primary",
+                    classes="action-btn",
+                )
+                yield Button(
+                    "⏸ Debug",
+                    id="debug-btn",
+                    variant="primary",
+                    classes="action-btn",
+                )
+                yield Button(
+                    "💬 Chat",
+                    id="chat-btn",
+                    variant="primary",
+                    classes="action-btn",
+                )
+
 
     async def on_mount(self) -> None:
         """Discover entrypoints once, and set the first as default."""
@@ -148,6 +146,9 @@ async def _load_schema_and_update_input(self, entrypoint: str) -> None:
 
     async def on_select_changed(self, event: Select.Changed) -> None:
         """Update JSON input when user selects an entrypoint."""
+        if event.select.id != "entrypoint-select":
+            return
+
         new_entrypoint = cast(str, event.value) if event.value else ""
 
         # Only load schema if the entrypoint actually changed
diff --git a/src/uipath/dev/ui/panels/run_details_panel.py b/src/uipath/dev/ui/panels/runs/run_details_panel.py
similarity index 100%
rename from src/uipath/dev/ui/panels/run_details_panel.py
rename to src/uipath/dev/ui/panels/runs/run_details_panel.py
diff --git a/src/uipath/dev/ui/panels/sidebar/__init__.py b/src/uipath/dev/ui/panels/sidebar/__init__.py
new file mode 100644
index 0000000..339449c
--- /dev/null
+++ b/src/uipath/dev/ui/panels/sidebar/__init__.py
@@ -0,0 +1,13 @@
+"""Sidebar panel components for the UiPath Developer Console."""
+
+from uipath.dev.ui.panels.sidebar.eval_sets_tab import EvalSetsTab
+from uipath.dev.ui.panels.sidebar.evaluators_tab import EvaluatorsTab
+from uipath.dev.ui.panels.sidebar.run_history_tab import RunHistoryTab
+from uipath.dev.ui.panels.sidebar.sidebar_panel import SidebarPanel
+
+__all__ = [
+    "EvalSetsTab",
+    "EvaluatorsTab",
+    "RunHistoryTab",
+    "SidebarPanel",
+]
diff --git a/src/uipath/dev/ui/panels/sidebar/eval_sets_tab.py b/src/uipath/dev/ui/panels/sidebar/eval_sets_tab.py
new file mode 100644
index 0000000..34f4f69
--- /dev/null
+++ b/src/uipath/dev/ui/panels/sidebar/eval_sets_tab.py
@@ -0,0 +1,348 @@
+"""Eval sets tab component."""
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal, ScrollableContainer, Vertical
+from textual.reactive import reactive
+from textual.widgets import (
+    Button,
+    Checkbox,
+    Input,
+    Label,
+    Select,
+)
+from uipath.eval._helpers import auto_discover_entrypoint
+
+from uipath.dev.models.eval_run import EvalRun
+from uipath.dev.services.eval_run_service import EvalRunService
+from uipath.dev.services.eval_set_service import EvalSetService
+from uipath.dev.services.evaluator_service import EvaluatorService
+
+
+class EvalSetsTab(Horizontal):
+    """Tab component for eval set configuration and run options."""
+
+    selected_eval_set = reactive("")
+    selected_entrypoint = reactive("")
+
+    def __init__(
+        self,
+        evaluator_service: EvaluatorService | None = None,
+        eval_set_service: EvalSetService | None = None,
+        eval_run_service: EvalRunService | None = None,
+        on_evaluation_selected: Callable[[dict[str, Any]], None] | None = None,
+        on_add_evaluation_clicked: Callable[[], None] | None = None,
+        on_assign_evaluator_clicked: Callable[[], None] | None = None,
+        on_create_eval_set_clicked: Callable[[], None] | None = None,
+        on_eval_set_changed: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the eval sets tab.
+
+        Args:
+            evaluator_service: Service for evaluator CRUD operations.
+            eval_set_service: Service for eval set CRUD operations.
+            eval_run_service: Service for eval run execution.
+            on_evaluation_selected: Callback when an evaluation is selected.
+            on_add_evaluation_clicked: Callback when add evaluation is clicked.
+            on_assign_evaluator_clicked: Callback when assign evaluator is clicked.
+            on_create_eval_set_clicked: Callback when create eval set is clicked.
+            on_eval_set_changed: Callback when eval set selection changes.
+        """
+        super().__init__(**kwargs)
+        self.evaluator_service = evaluator_service or EvaluatorService()
+        self.eval_set_service = eval_set_service or EvalSetService()
+        self.eval_run_service = eval_run_service or EvalRunService()
+
+        # Action-specific callbacks
+        self.on_evaluation_selected = on_evaluation_selected
+        self.on_add_evaluation_clicked = on_add_evaluation_clicked
+        self.on_assign_evaluator_clicked = on_assign_evaluator_clicked
+        self.on_create_eval_set_clicked = on_create_eval_set_clicked
+        self.on_eval_set_changed = on_eval_set_changed
+
+        # Initialize entrypoints
+        self.entrypoints: list[str] = []
+        self.entrypoint_paths: list[str] = []
+        self._load_entrypoints()
+
+        # Initialize eval sets
+        self.eval_sets: list[dict[str, Any]] = []
+        self.eval_sets_paths: list[str] = []
+        self._refresh_eval_sets()
+
+        # Current eval set data (needed for creating runs)
+        self.current_eval_set_data: dict[str, Any] | None = None
+
+        # Run options
+        self.workers_count = 1
+        self.no_report = False
+        self.enable_mocker_cache = False
+        self.report_coverage = False
+        self.output_file = ""
+        self.eval_ids = ""
+        self.eval_set_run_id = ""
+
+    def _load_entrypoints(self) -> None:
+        """Load available entrypoints."""
+        try:
+            json_path = os.path.join(os.getcwd(), "entry-points.json")
+            with open(json_path, "r") as f:
+                data = json.load(f)
+            self.entrypoints = data.get("entryPoints", [])
+            self.entrypoint_paths = [ep["filePath"] for ep in self.entrypoints]
+            self.selected_entrypoint = (
+                self.entrypoint_paths[0] if self.entrypoint_paths else ""
+            )
+        except (FileNotFoundError, json.JSONDecodeError, KeyError):
+            self.selected_entrypoint = ""
+
+    def _refresh_eval_sets(self) -> None:
+        """Refresh the list of available eval sets."""
+        self.eval_sets = self.eval_set_service.list_eval_sets()
+        self.eval_sets_paths = [es["file_path"] for es in self.eval_sets]
+        if self.eval_sets_paths and not self.selected_eval_set:
+            self.selected_eval_set = self.eval_sets_paths[0]
+
+    def compose(self) -> ComposeResult:
+        """Compose the eval sets tab UI for sidebar."""
+        with Vertical(classes="eval-sets-main"):
+            # Action buttons at top
+            with Horizontal(classes="list-actions-row"):
+                yield Button(
+                    "▶ Run", id="eval-run-btn", variant="primary", classes="small-btn"
+                )
+                yield Button(
+                    "+ Add",
+                    id="create-eval-set-btn",
+                    variant="default",
+                    classes="small-btn",
+                )
+
+            # Scrollable form content
+            with ScrollableContainer(classes="eval-sets-form"):
+                # Eval Set selection
+                yield Label("Eval Set:", classes="field-label-inline")
+                yield Select(
+                    options=[(Path(p).stem, p) for p in self.eval_sets_paths]
+                    if self.eval_sets_paths
+                    else [],
+                    id="eval-set-dropdown",
+                    value=self.selected_eval_set
+                    if self.eval_sets_paths
+                    else Select.BLANK,
+                    allow_blank=True,
+                    prompt="Select eval set...",
+                )
+
+                # Entrypoint selection
+                yield Label("Entrypoint:", classes="field-label-inline")
+                yield Select(
+                    options=[(p, p) for p in self.entrypoint_paths]
+                    if self.entrypoint_paths
+                    else [],
+                    id="entrypoint-dropdown",
+                    value=self.selected_entrypoint
+                    if self.entrypoint_paths
+                    else Select.BLANK,
+                    allow_blank=True,
+                    prompt="Select entrypoint...",
+                )
+
+                # Run options
+                yield Label("Workers:", classes="field-label-inline")
+                yield Input(value="1", id="workers-input", classes="opt-input-small")
+                yield Checkbox(
+                    "No Report", id="no-report-checkbox", classes="opt-checkbox"
+                )
+                yield Checkbox(
+                    "Mocker Cache",
+                    id="enable-mocker-cache-checkbox",
+                    classes="opt-checkbox",
+                )
+                yield Checkbox(
+                    "Coverage", id="report-coverage-checkbox", classes="opt-checkbox"
+                )
+
+    async def on_mount(self) -> None:
+        """Handle mount event."""
+        if self.selected_eval_set:
+            await self._load_eval_set_data()
+
+    async def on_select_changed(self, event: Select.Changed) -> None:
+        """Handle select changes."""
+        if event.select.id == "eval-set-dropdown":
+            self.selected_eval_set = str(event.value) if event.value else ""
+            await self._load_eval_set_data()
+            if self.on_eval_set_changed:
+                self.on_eval_set_changed()
+        elif event.select.id == "entrypoint-dropdown":
+            self.selected_entrypoint = str(event.value) if event.value else ""
+
+    async def on_input_changed(self, event: Input.Changed) -> None:
+        """Handle input changes."""
+        if event.input.id == "workers-input":
+            try:
+                self.workers_count = int(event.value) if event.value else 1
+            except ValueError:
+                self.workers_count = 1
+        elif event.input.id == "eval-ids-input":
+            self.eval_ids = event.value
+        elif event.input.id == "eval-set-run-id-input":
+            self.eval_set_run_id = event.value
+        elif event.input.id == "output-file-input":
+            self.output_file = event.value
+
+    async def on_checkbox_changed(self, event: Checkbox.Changed) -> None:
+        """Handle checkbox changes."""
+        checkbox_id = event.checkbox.id or ""
+        if checkbox_id == "no-report-checkbox":
+            self.no_report = event.value
+        elif checkbox_id == "enable-mocker-cache-checkbox":
+            self.enable_mocker_cache = event.value
+        elif checkbox_id == "report-coverage-checkbox":
+            self.report_coverage = event.value
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        btn_id = event.button.id or ""
+        if btn_id == "create-eval-set-btn":
+            await self._show_create_eval_set_form()
+
+    def get_run_options(self) -> dict[str, Any]:
+        """Get all options needed to create an EvalRun."""
+        evaluator_refs = []
+        if self.current_eval_set_data:
+            evaluator_refs = self.current_eval_set_data.get("evaluatorRefs", [])
+
+        eval_ids_list: list[str] = []
+        if self.eval_ids:
+            eval_ids_list = [
+                id.strip() for id in self.eval_ids.split(",") if id.strip()
+            ]
+
+        return {
+            "eval_set_path": self.selected_eval_set,
+            "entrypoint": self.selected_entrypoint or auto_discover_entrypoint(),
+            "evaluator_refs": evaluator_refs,
+            "workers": self.workers_count,
+            "no_report": self.no_report,
+            "enable_mocker_cache": self.enable_mocker_cache,
+            "report_coverage": self.report_coverage,
+            "output_file": self.output_file or None,
+            "eval_ids": eval_ids_list,
+            "eval_set_run_id": self.eval_set_run_id or None,
+        }
+
+    def get_current_eval_set_data(self) -> dict[str, Any] | None:
+        """Get the current eval set data."""
+        return self.current_eval_set_data
+
+    def set_current_eval_set_data(self, data: dict[str, Any] | None) -> None:
+        """Set the current eval set data."""
+        self.current_eval_set_data = data
+
+    def get_selected_eval_set_path(self) -> str:
+        """Get the selected eval set file path."""
+        return self.selected_eval_set
+
+    async def _load_eval_set_data(self) -> None:
+        """Load the selected eval set data."""
+        if not self.selected_eval_set:
+            return
+
+        eval_set_name = Path(self.selected_eval_set).stem
+        self.current_eval_set_data = self.eval_set_service.load_eval_set(eval_set_name)
+
+    def refresh_eval_sets(self) -> None:
+        """Public method to refresh the eval sets list."""
+        self._refresh_eval_sets()
+        try:
+            select = self.query_one("#eval-set-dropdown", Select)
+            options = [(Path(p).stem, p) for p in self.eval_sets_paths]
+            select.set_options(options)
+            if self.eval_sets_paths:
+                select.value = self.eval_sets_paths[0]
+        except Exception:
+            pass
+
+    def select_eval_set(self, eval_set_id: str) -> None:
+        """Select an eval set by ID.
+
+        Args:
+            eval_set_id: The eval set ID (file stem without extension).
+        """
+        # Find the matching path
+        for path in self.eval_sets_paths:
+            if Path(path).stem == eval_set_id:
+                self.selected_eval_set = path
+                try:
+                    select = self.query_one("#eval-set-dropdown", Select)
+                    select.value = path
+                except Exception:
+                    pass
+                # Load the data for the selected eval set
+                self.call_later(self._load_eval_set_data)
+                if self.on_eval_set_changed:
+                    self.on_eval_set_changed()
+                break
+
+    async def show_evaluation_detail(self, eval_data: dict[str, Any]) -> None:
+        """Notify parent that an evaluation was selected."""
+        if self.on_evaluation_selected:
+            self.on_evaluation_selected(eval_data)
+
+    async def _show_create_eval_set_form(self) -> None:
+        """Notify parent to show create eval set form."""
+        if self.on_create_eval_set_clicked:
+            self.on_create_eval_set_clicked()
+
+    async def show_add_evaluation_form(self) -> None:
+        """Notify parent to show add evaluation form."""
+        if not self.current_eval_set_data:
+            self.app.notify("Please select an eval set first", severity="error")
+            return
+        if self.on_add_evaluation_clicked:
+            self.on_add_evaluation_clicked()
+
+    async def show_assign_evaluator_form(self) -> None:
+        """Notify parent to show assign evaluator form."""
+        if not self.current_eval_set_data:
+            self.app.notify("Please select an eval set first", severity="error")
+            return
+        if self.on_assign_evaluator_clicked:
+            self.on_assign_evaluator_clicked()
+
+    def create_eval_run(self) -> EvalRun:
+        """Create an EvalRun object from current selections."""
+        eval_set_path = self.selected_eval_set
+        entrypoint = self.selected_entrypoint or auto_discover_entrypoint()
+
+        evaluator_refs = []
+        if self.current_eval_set_data:
+            evaluator_refs = self.current_eval_set_data.get("evaluatorRefs", [])
+
+        eval_ids_list: list[str] = []
+        if self.eval_ids:
+            eval_ids_list = [
+                id.strip() for id in self.eval_ids.split(",") if id.strip()
+            ]
+
+        return EvalRun(
+            eval_set_path=eval_set_path,
+            entrypoint=entrypoint,
+            name=f"Eval: {Path(eval_set_path).stem}",
+            status="running",
+            evaluator_refs=evaluator_refs,
+            workers=self.workers_count,
+            no_report=self.no_report,
+            enable_mocker_cache=self.enable_mocker_cache,
+            report_coverage=self.report_coverage,
+            output_file=self.output_file or None,
+            eval_ids=eval_ids_list,
+            eval_set_run_id=self.eval_set_run_id or None,
+        )
diff --git a/src/uipath/dev/ui/panels/sidebar/evaluators_tab.py b/src/uipath/dev/ui/panels/sidebar/evaluators_tab.py
new file mode 100644
index 0000000..80b02bb
--- /dev/null
+++ b/src/uipath/dev/ui/panels/sidebar/evaluators_tab.py
@@ -0,0 +1,96 @@
+"""Evaluators tab component."""
+
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Horizontal, Vertical
+from textual.widgets import Button, ListItem, ListView, Static
+
+from uipath.dev.services.evaluator_service import EvaluatorService
+
+
+class EvaluatorsTab(Horizontal):
+    """Tab component for listing existing evaluators and creating new ones."""
+
+    def __init__(
+        self,
+        evaluator_service: EvaluatorService | None = None,
+        on_evaluator_selected: Callable[[dict[str, Any]], None] | None = None,
+        on_new_evaluator_clicked: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the evaluators tab.
+
+        Args:
+            evaluator_service: Service for evaluator CRUD operations.
+            on_evaluator_selected: Callback when an evaluator is selected.
+            on_new_evaluator_clicked: Callback when add button is clicked.
+        """
+        super().__init__(**kwargs)
+        self.evaluator_service = evaluator_service or EvaluatorService()
+        self.on_evaluator_selected = on_evaluator_selected
+        self.on_new_evaluator_clicked = on_new_evaluator_clicked
+
+    def compose(self) -> ComposeResult:
+        """Compose the evaluators tab UI."""
+        with Vertical(classes="eval-sets-main"):
+            with Horizontal(classes="list-actions-row"):
+                yield Button(
+                    "+ Add", id="add-evaluator-btn", variant="default", classes="small-btn"
+                )
+
+            # List of existing evaluators
+            yield ListView(id="existing-evaluators-list", classes="eval-items-list")
+
+    async def on_mount(self) -> None:
+        """Handle mount event - populate the list."""
+        await self._populate_evaluators_list()
+
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        btn_id = event.button.id or ""
+        if btn_id == "add-evaluator-btn":
+            if self.on_new_evaluator_clicked:
+                self.on_new_evaluator_clicked()
+
+    async def _populate_evaluators_list(self) -> None:
+        """Populate the existing evaluators list."""
+        try:
+            list_view = self.query_one("#existing-evaluators-list", ListView)
+            await list_view.clear()
+
+            # Get existing evaluators from service
+            evaluators = self.evaluator_service.list_evaluators()
+
+            if not evaluators:
+                item = ListItem(
+                    Static("[dim]No evaluators yet. Click + Add to create one.[/dim]"),
+                    classes="eval-list-item",
+                )
+                await list_view.append(item)
+                return
+
+            for ev in evaluators:
+                ev_id = ev.get("id", "")
+
+                item = ListItem(
+                    Static(ev_id),
+                    classes="eval-list-item",
+                )
+                item.evaluator_id = ev_id  # type: ignore
+                item.evaluator_data = ev  # type: ignore
+                await list_view.append(item)
+
+        except Exception:
+            pass
+
+    async def on_list_view_selected(self, event: ListView.Selected) -> None:
+        """Handle list item selection."""
+        if event.list_view.id == "existing-evaluators-list" and event.item:
+            ev_data = getattr(event.item, "evaluator_data", None)
+            if ev_data and self.on_evaluator_selected:
+                self.on_evaluator_selected(ev_data)
+
+    async def refresh_list(self) -> None:
+        """Refresh the existing evaluators list."""
+        await self._populate_evaluators_list()
diff --git a/src/uipath/dev/ui/panels/run_history_panel.py b/src/uipath/dev/ui/panels/sidebar/run_history_tab.py
similarity index 65%
rename from src/uipath/dev/ui/panels/run_history_panel.py
rename to src/uipath/dev/ui/panels/sidebar/run_history_tab.py
index cdeaac3..99da06a 100644
--- a/src/uipath/dev/ui/panels/run_history_panel.py
+++ b/src/uipath/dev/ui/panels/sidebar/run_history_tab.py
@@ -1,46 +1,65 @@
 """Panel for displaying execution run history."""
 
+from typing import Callable
+
 from rich.text import Text
 from textual.app import ComposeResult
-from textual.containers import Container, Vertical
-from textual.widgets import (
-    Button,
-    ListItem,
-    ListView,
-    Static,
-    TabbedContent,
-    TabPane,
-)
+from textual.containers import Vertical
+from textual.widgets import Button, ListItem, ListView, Static
 
 from uipath.dev.models.execution import ExecutionRun
 
 
-class RunHistoryPanel(Container):
+class RunHistoryTab(Vertical):
     """Left panel showing execution run history."""
 
-    def __init__(self, **kwargs):
-        """Initialize RunHistoryPanel with empty run list."""
+    def __init__(
+        self,
+        on_run_selected: Callable[[ExecutionRun], None] | None = None,
+        on_new_run_clicked: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the run history tab.
+
+        Args:
+            on_run_selected: Callback when a run is selected.
+            on_new_run_clicked: Callback when the "+ New" button is clicked.
+        """
         super().__init__(**kwargs)
         self.runs: list[ExecutionRun] = []
         self.selected_run: ExecutionRun | None = None
+        self.on_run_selected = on_run_selected
+        self.on_new_run_clicked = on_new_run_clicked
 
     def compose(self) -> ComposeResult:
         """Compose the RunHistoryPanel layout."""
-        with TabbedContent():
-            with TabPane("History", id="history-tab"):
-                with Vertical():
-                    yield ListView(id="run-list", classes="run-list")
-                    yield Button(
-                        "+ New",
-                        id="new-run-btn",
-                        variant="primary",
-                        classes="new-run-btn",
-                    )
+        yield ListView(id="run-list", classes="run-list")
+        yield Button(
+            "+ New",
+            id="new-run-btn",
+            variant="primary",
+            classes="new-run-btn",
+        )
 
     def on_mount(self) -> None:
         """Set up periodic refresh for running items."""
         self.set_interval(5.0, self._refresh_running_items)
 
+    async def on_button_pressed(self, event: Button.Pressed) -> None:
+        """Handle button presses."""
+        if event.button.id == "new-run-btn":
+            if self.on_new_run_clicked:
+                self.on_new_run_clicked()
+
+    async def on_list_view_selected(self, event: ListView.Selected) -> None:
+        """Handle list selection."""
+        if event.list_view.id == "run-list" and event.item:
+            run_id = getattr(event.item, "run_id", None)
+            if run_id:
+                run = self.get_run_by_id(run_id)
+                if run and self.on_run_selected:
+                    self.on_run_selected(run)
+
     def add_run(self, run: ExecutionRun) -> None:
         """Add a new run to history (at the top)."""
         self.runs.insert(0, run)
@@ -56,7 +75,7 @@ def update_run(self, run: ExecutionRun) -> None:
         # If run not found, just ignore; creation is done via add_run()
 
     def get_run_by_id(self, run_id: str) -> ExecutionRun | None:
-        """Get a run."""
+        """Get a run by ID."""
         for run in self.runs:
             if run.id == run_id:
                 return run
@@ -68,11 +87,7 @@ def clear_runs(self) -> None:
         self._rebuild_list()
 
     def _format_run_label(self, run: ExecutionRun) -> Text:
-        """Format the label for a run item.
-
-        - Preserves styling from `ExecutionRun.display_name` (rich.Text)
-        - Ensures exactly one leading space before the content
-        """
+        """Format the label for a run item."""
         base = run.display_name
 
         # Ensure we have a Text object
@@ -91,12 +106,16 @@ def _format_run_label(self, run: ExecutionRun) -> Text:
         return text
 
     def _rebuild_list(self) -> None:
-        run_list = self.query_one("#run-list", ListView)
-        run_list.clear()
+        """Rebuild the entire list."""
+        try:
+            run_list = self.query_one("#run-list", ListView)
+            run_list.clear()
 
-        for run in self.runs:
-            item = self._create_list_item(run)
-            run_list.append(item)
+            for run in self.runs:
+                item = self._create_list_item(run)
+                run_list.append(item)
+        except Exception:
+            pass
 
     def _create_list_item(self, run: ExecutionRun) -> ListItem:
         item = ListItem(
diff --git a/src/uipath/dev/ui/panels/sidebar/sidebar_panel.py b/src/uipath/dev/ui/panels/sidebar/sidebar_panel.py
new file mode 100644
index 0000000..2482e03
--- /dev/null
+++ b/src/uipath/dev/ui/panels/sidebar/sidebar_panel.py
@@ -0,0 +1,188 @@
+"""Main sidebar panel - composes individual tab components."""
+
+from typing import Any, Callable
+
+from textual.app import ComposeResult
+from textual.containers import Vertical
+from textual.widgets import TabbedContent, TabPane
+
+from uipath.dev.models.execution import ExecutionRun
+from uipath.dev.services.eval_run_service import EvalRunService
+from uipath.dev.services.eval_set_service import EvalSetService
+from uipath.dev.services.evaluator_service import EvaluatorService
+from uipath.dev.ui.panels.sidebar.eval_sets_tab import EvalSetsTab
+from uipath.dev.ui.panels.sidebar.evaluators_tab import EvaluatorsTab
+from uipath.dev.ui.panels.sidebar.run_history_tab import RunHistoryTab
+
+
+class SidebarPanel(Vertical):
+    """Sidebar panel that composes individual tab components."""
+
+    def __init__(
+        self,
+        evaluator_service: EvaluatorService | None = None,
+        eval_set_service: EvalSetService | None = None,
+        eval_run_service: EvalRunService | None = None,
+        # Run history callbacks
+        on_run_selected: Callable[[ExecutionRun], None] | None = None,
+        on_new_run_clicked: Callable[[], None] | None = None,
+        # Eval sets callbacks (action-specific)
+        on_evaluation_selected: Callable[[dict[str, Any]], None] | None = None,
+        on_add_evaluation_clicked: Callable[[], None] | None = None,
+        on_assign_evaluator_clicked: Callable[[], None] | None = None,
+        on_create_eval_set_clicked: Callable[[], None] | None = None,
+        on_eval_set_changed: Callable[[], None] | None = None,
+        # Evaluators callbacks (action-specific)
+        on_evaluator_selected: Callable[[dict[str, Any]], None] | None = None,
+        on_new_evaluator_clicked: Callable[[], None] | None = None,
+        **kwargs,
+    ):
+        """Initialize the sidebar panel.
+
+        Args:
+            evaluator_service: Service for evaluator CRUD operations.
+            eval_set_service: Service for eval set CRUD operations.
+            eval_run_service: Service for eval run execution.
+            on_run_selected: Callback when a run is selected in history.
+            on_new_run_clicked: Callback when "+ New" is clicked.
+            on_evaluation_selected: Callback when an evaluation is selected (full data).
+            on_add_evaluation_clicked: Callback when add evaluation is clicked.
+            on_assign_evaluator_clicked: Callback when assign evaluator is clicked.
+            on_create_eval_set_clicked: Callback when create eval set is clicked.
+            on_eval_set_changed: Callback when eval set selection changes.
+            on_evaluator_selected: Callback when an evaluator is selected (full data).
+            on_new_evaluator_clicked: Callback when add evaluator is clicked.
+        """
+        super().__init__(**kwargs)
+
+        # Services
+        self.evaluator_service = evaluator_service or EvaluatorService()
+        self.eval_set_service = eval_set_service or EvalSetService()
+        self.eval_run_service = eval_run_service or EvalRunService()
+
+        # Run history callbacks
+        self.on_run_selected = on_run_selected
+        self.on_new_run_clicked = on_new_run_clicked
+
+        # Eval sets callbacks (action-specific)
+        self.on_evaluation_selected = on_evaluation_selected
+        self.on_add_evaluation_clicked = on_add_evaluation_clicked
+        self.on_assign_evaluator_clicked = on_assign_evaluator_clicked
+        self.on_create_eval_set_clicked = on_create_eval_set_clicked
+        self.on_eval_set_changed = on_eval_set_changed
+
+        # Evaluators callbacks (action-specific)
+        self.on_evaluator_selected = on_evaluator_selected
+        self.on_new_evaluator_clicked = on_new_evaluator_clicked
+
+    def compose(self) -> ComposeResult:
+        """Compose the sidebar panel with three tabs."""
+        with TabbedContent(id="history-tabs"):
+            with TabPane("Run History", id="run-history-tab"):
+                yield RunHistoryTab(
+                    on_run_selected=self.on_run_selected,
+                    on_new_run_clicked=self.on_new_run_clicked,
+                    id="run-history-tab-content",
+                )
+
+            with TabPane("Eval Sets", id="eval-sets-tab"):
+                yield EvalSetsTab(
+                    evaluator_service=self.evaluator_service,
+                    eval_set_service=self.eval_set_service,
+                    eval_run_service=self.eval_run_service,
+                    # Action-specific callbacks
+                    on_evaluation_selected=self.on_evaluation_selected,
+                    on_add_evaluation_clicked=self.on_add_evaluation_clicked,
+                    on_assign_evaluator_clicked=self.on_assign_evaluator_clicked,
+                    on_create_eval_set_clicked=self.on_create_eval_set_clicked,
+                    on_eval_set_changed=self.on_eval_set_changed,
+                    id="eval-sets-panel",
+                )
+
+            with TabPane("Evaluators", id="evaluators-tab"):
+                yield EvaluatorsTab(
+                    evaluator_service=self.evaluator_service,
+                    # Action-specific callbacks
+                    on_evaluator_selected=self.on_evaluator_selected,
+                    on_new_evaluator_clicked=self.on_new_evaluator_clicked,
+                    id="evaluators-panel",
+                )
+
+    # =========================================================================
+    # Run History Tab Delegation
+    # =========================================================================
+
+    def get_run_history_tab(self) -> RunHistoryTab | None:
+        """Get the run history tab component."""
+        try:
+            return self.query_one("#run-history-tab-content", RunHistoryTab)
+        except Exception:
+            return None
+
+    def add_run(self, run: ExecutionRun) -> None:
+        """Add a new run to history."""
+        tab = self.get_run_history_tab()
+        if tab:
+            tab.add_run(run)
+
+    def update_run(self, run: ExecutionRun) -> None:
+        """Update an existing run."""
+        tab = self.get_run_history_tab()
+        if tab:
+            tab.update_run(run)
+
+    def get_run_by_id(self, run_id: str) -> ExecutionRun | None:
+        """Get a run by ID."""
+        tab = self.get_run_history_tab()
+        if tab:
+            return tab.get_run_by_id(run_id)
+        return None
+
+    def clear_runs(self) -> None:
+        """Clear all runs from history."""
+        tab = self.get_run_history_tab()
+        if tab:
+            tab.clear_runs()
+
+    # =========================================================================
+    # Eval Sets Tab Delegation
+    # =========================================================================
+
+    def get_eval_sets_tab(self) -> EvalSetsTab | None:
+        """Get the eval sets tab component."""
+        try:
+            return self.query_one("#eval-sets-panel", EvalSetsTab)
+        except Exception:
+            return None
+
+    # =========================================================================
+    # Evaluators Tab Delegation
+    # =========================================================================
+
+    def get_evaluators_tab(self) -> EvaluatorsTab | None:
+        """Get the evaluators tab component."""
+        try:
+            return self.query_one("#evaluators-panel", EvaluatorsTab)
+        except Exception:
+            return None
+
+    # =========================================================================
+    # Tab Switching
+    # =========================================================================
+
+    def switch_to_run_history(self) -> None:
+        """Switch to the run history tab."""
+        try:
+            tabbed_content = self.query_one("#history-tabs", TabbedContent)
+            tabbed_content.active = "run-history-tab"
+        except Exception:
+            pass
+
+    def switch_to_eval_sets(self) -> None:
+        """Switch to the eval sets tab."""
+        try:
+            tabbed_content = self.query_one("#history-tabs", TabbedContent)
+            tabbed_content.active = "eval-sets-tab"
+        except Exception:
+            pass
+
diff --git a/src/uipath/dev/ui/styles/terminal.tcss b/src/uipath/dev/ui/styles/terminal.tcss
index 627883d..efc1bd4 100644
--- a/src/uipath/dev/ui/styles/terminal.tcss
+++ b/src/uipath/dev/ui/styles/terminal.tcss
@@ -1,13 +1,137 @@
+/* === Base Layout === */
 Screen {
     layout: horizontal;
 }
 
-.run-history {
+.hidden {
+    display: none;
+}
+
+/* === Main Layout Panels === */
+.left-panel {
     width: 30%;
     min-width: 25;
     padding-right: 1;
 }
 
+.main-content {
+    width: 70%;
+    padding-left: 1;
+    height: 100%;
+}
+
+.middle-panel {
+    width: 1fr;
+    height: 100%;
+    overflow: hidden;
+}
+
+.right-edit-panel {
+    width: 1fr;
+    height: 100%;
+    border-left: tall $primary 30%;
+}
+
+/* === Common Widget Overrides === */
+Label {
+    margin: 1 1;
+    width: 100%;
+    height: 100%;
+    border: tall $primary;
+    content-align: center middle;
+}
+
+/* Override Label styles for all eval/detail panels */
+EvalRunDetailsPanel Label,
+EvalSetsTab Label,
+EvaluatorsTab Label,
+SidebarPanel Label {
+    border: none;
+    height: auto;
+    width: auto;
+    margin: 0;
+    content-align: left middle;
+}
+
+TabbedContent {
+    height: 100%;
+}
+
+TabPane {
+    height: 100%;
+    padding: 0;
+}
+
+ContentSwitcher {
+    height: 1fr;
+}
+
+Footer {
+    margin-top: 1;
+    height: auto;
+    dock: bottom;
+}
+
+Checkbox {
+    margin-top: 1;
+}
+
+TextArea.invalid {
+    border: tall red;
+}
+
+/* === All TabbedContent containers === */
+#new-run-tabs,
+#eval-tabs,
+#edit-tabs,
+#create-tabs,
+#right-eval-tabs,
+#evaluator-create-tabs,
+#eval-run-tabs,
+#eval-run-details-tabs {
+    height: 100%;
+}
+
+/* Hide the outer tab bar for eval-run-tabs so only inner tabs show */
+#eval-run-tabs > ContentSwitcher {
+    height: 100%;
+}
+
+#eval-run-tabs > Tabs {
+    display: none;
+}
+
+#eval-run-details-panel {
+    height: 100%;
+}
+
+/* All TabPane containers */
+#new-run-tabs TabPane,
+#eval-tabs TabPane,
+#edit-tabs TabPane,
+#create-tabs TabPane,
+#right-eval-tabs TabPane,
+#evaluator-create-tabs TabPane,
+#eval-run-tabs TabPane,
+#eval-run-details-tabs TabPane {
+    height: 100%;
+    padding: 0 1;
+}
+
+/* Individual tab IDs */
+#new-run-tab,
+#evaluations-tab,
+#evaluators-tab,
+#edit-tab,
+#create-tab,
+#eval-run-tab,
+#eval-details-tab,
+#eval-traces-tab,
+#eval-logs-tab {
+    height: 100%;
+}
+
+/* === Run History List === */
 .run-list {
     height: 1fr;
     margin-bottom: 1;
@@ -42,112 +166,134 @@ Screen {
     color: #ff4444;
 }
 
+/* === Buttons === */
 .new-run-btn {
     width: 100%;
     margin-bottom: 1;
     border: none;
     text-style: bold;
 }
-.main-content {
-    width: 70%;
-    padding-left: 1;
-}
 
-.new-run-title {
+.action-btn {
+    min-width: 10;
+    padding: 0 2;
     text-style: bold;
+    border: none;
+    margin-right: 2;
+}
+
+.small-btn {
+    min-width: 5;
+    margin-right: 1;
     padding: 0 1;
-    height: 1;
-    margin-bottom: 0;
+    border: none;
 }
 
-.new-run-panel {
-    height: 100%;
+.tiny-btn {
+    min-width: 4;
+    margin-right: 1;
+    padding: 0;
+    border: none;
 }
 
-.field-label {
-    text-style: bold;
+.close-btn {
+    min-width: 3;
+    dock: right;
+    padding: 0;
+    border: none;
 }
 
-.run-actions {
-    dock: bottom;
+/* === Text Styles === */
+.panel-title {
+    text-style: bold;
+    padding: 0 0;
+    margin-bottom: 1;
     height: auto;
-    align: left middle;
+    width: 100%;
 }
 
-.action-btn {
-    margin-right: 2;
-    min-width: 8;
-    border: none;
+.section-header {
     text-style: bold;
+    margin-top: 1;
+    margin-bottom: 0;
+    color: $primary;
+    height: auto;
 }
 
-.details-content {
-    height: 1fr;
+.helper-text {
+    margin-bottom: 1;
+    height: auto;
 }
 
-.traces-section,
-.logs-section {
-    width: 50%;
-    height: 100%;
+/* === Form Elements === */
+.json-input {
+    margin-top: 1;
+    height: auto;
 }
 
-.traces-section {
-    width: 50%;
+/* === Detail Panel Common Styles === */
+.detail-row {
+    height: auto;
+    margin: 0;
 }
 
-.logs-section {
-    width: 50%;
+.detail-row-small {
+    height: auto;
+    margin: 0;
+    padding-left: 1;
 }
 
-.detail-log {
-    height: 1fr;
-    padding: 1;
-    padding-top: 0;
+.detail-json {
+    height: 8;
     margin-bottom: 1;
 }
-.span-detail-log {
-    height: 1fr;
-    padding: 1;
-    padding-top: 0;
+
+.detail-json-large {
+    height: auto;
+    min-height: 5;
+    max-height: 30;
+    margin-bottom: 1;
 }
 
-.status-running {
-    background: #ffaa00;
-    color: #000000;
-    border: solid #ffaa00;
+.detail-input {
+    height: auto;
+    margin-bottom: 1;
 }
 
-.status-success {
-    background: #00ff88;
-    color: #000000;
-    border: solid #00ff88;
+/* === Run Actions === */
+.run-actions {
+    dock: bottom;
+    height: auto;
+    padding: 1;
+    align: left middle;
 }
 
-.status-error {
-    background: #ff4444;
-    color: #ffffff;
-    border: solid #ff4444;
+.run-info-section {
+    height: auto;
+    margin-bottom: 1;
 }
 
-.hidden {
-    display: none;
+/* === Lists === */
+.eval-items-list {
+    height: 1fr;
+    min-height: 5;
 }
 
-Footer {
-    margin-top:1;
+.eval-list-item {
     height: auto;
-    dock: bottom;
 }
 
-TabbedContent {
-    height: 100%;
+.list-actions-row {
+    height: 2;
+    width: 100%;
 }
 
-TabPane {
+/* === New Run Panel === */
+.new-run-panel-content {
     height: 100%;
-    padding: 0;
 }
 
+/* === Traces & Logs === */
 .traces-content {
     height: 100%;
     margin-bottom: 1;
@@ -171,72 +317,36 @@ TabPane {
     padding-top: 0;
 }
 
-Label {
-    margin: 1 1;
-    width: 100%;
-    height: 100%;
-    border: tall $primary;
-    content-align: center middle;
-}
-
-ContentSwitcher {
+.detail-log,
+.span-detail-log {
     height: 1fr;
+    padding: 1;
+    padding-top: 0;
 }
 
-SpanDetailsDisplay {
-    height: 100%;
-}
-
-#span-details-display {
-    height: 100%;
+.detail-log {
+    margin-bottom: 1;
 }
 
+SpanDetailsDisplay,
+#span-details-display,
 #span-details {
     height: 100%;
 }
 
-.new-run-panel {
-    height: 100%;
-}
-
-.new-run-title {
-    text-style: bold;
-    padding: 0 1;
-    height: 2;
-    content-align: left middle;
-}
-
-.field-label {
-    text-style: bold;
-    margin: 1 0;
-}
-
-.script-input {
-    height: 3;
-}
-
-.json-input {
-    margin-top: 1;
-    height: auto;
-}
-
-.run-actions {
+/* === Debug Controls === */
+#debug-controls {
     height: auto;
-    padding: 1;
-}
-
-.action-btn {
-    min-width: 10;
-    padding: 0 2;
-    text-style: bold;
-    border: none;
+    dock: bottom;
 }
 
-TextArea.invalid {
-    border: tall red;
+.debug-actions-row {
+    height: 2;
+    width: 100%;
+    align: left middle;
 }
 
-
+/* === Chat === */
 Prompt {
     border: wide $primary-background;
     background: $surface;
@@ -245,6 +355,7 @@ Prompt {
     margin-left: 1;
     padding: 1 1 0 1;
 }
+
 Response, Tool {
     border: wide $primary-background;
     background: $surface;
@@ -254,53 +365,142 @@ Response, Tool {
     padding: 1 1 0 1;
 }
 
-#chat-container{
+#chat-container {
     background: $surface;
+    margin-bottom: 1;
 }
 
-#chat-input{
+#chat-input {
     dock: bottom;
     margin: 1;
 }
 
-Checkbox{
-    margin-top: 1;
+#eval-details-placeholder,
+#eval-set-detail-placeholder {
+    text-align: center;
+    padding: 2;
+    color: $text-muted;
 }
 
-#debug-controls {
-    height: auto;
-    dock: bottom;
+#eval-details-content {
+    height: 100%;
 }
 
-.debug-actions-row {
-    height: 2;
+.eval-error {
+    margin: 0;
+}
+
+/* === Eval Sets Panel === */
+.eval-sets-main {
     width: 100%;
-    align: left middle;
+    height: 100%;
+    padding: 0 1;
 }
 
-Prompt {
-    border: wide $primary-background;
-    background: $surface;
-    color: $text;
-    margin-right: 8;
-    margin-left: 1;
-    padding: 1 1 0 1;
+.eval-sets-form {
+    height: 1fr;
+    padding: 0;
 }
-Response, Tool {
-    border: wide $primary-background;
-    background: $surface;
-    color: $text;
-    margin: 1;
-    margin-left: 8;
-    padding: 1 1 0 1;
+
+#eval-sets-tab {
+    padding: 0;
 }
 
-#chat-container{
-    background: $surface;
+.opt-input-small {
+    width: 8;
+    height: 3;
+    margin-bottom: 0;
+}
+
+.field-label-inline {
+    height: 1;
+    margin: 0;
+}
+
+.opt-checkbox {
+    height: 3;
+    margin: 0;
+}
+
+/* Evaluator criteria section */
+.ev-row {
+    height: auto;
+    width: 100%;
+    align: left top;
     margin-bottom: 1;
 }
 
-#chat-input{
-    dock: bottom;
-    margin: 1;
+.ev-cb {
+    margin: 0;
+    width: auto;
+    height: auto;
+}
+
+.ev-collapse {
+    width: 1fr;
+    height: auto;
+}
+
+.ev-desc {
+    height: auto;
+    margin-bottom: 1;
+}
+
+.ev-criteria-header {
+    height: auto;
+    margin-bottom: 1;
+    color: $primary;
+}
+
+.ev-field-lbl {
+    height: auto;
+    margin: 0;
+    margin-top: 1;
+}
+
+.ev-field-input {
+    height: auto;
+    margin-bottom: 0;
+    margin-right: 2;
+}
+
+.button-row {
+    height: auto;
+    margin-top: 1;
+}
+
+.button-row Button {
+    margin-right: 1;
+}
+
+/* === Edit Panel Content === */
+#edit-panel-content {
+    height: 1fr;
+    padding: 0 1;
+}
+
+#edit-panel-content .close-btn {
+    dock: right;
+    min-width: 3;
+    padding: 0;
+    border: none;
+}
+
+#edit-panel-content Collapsible {
+    height: auto;
+    margin: 0 0 1 0;
+    padding: 0;
+    border: none;
+}
+
+#edit-panel-content CollapsibleTitle {
+    padding: 1 1;
+    margin: 0;
+    height: auto;
+    width: 100%;
+}
+
+#edit-panel-content Collapsible > Contents {
+    height: auto;
+    padding: 1 1 1 1;
 }