diff --git a/.gitattributes b/.gitattributes index 245dd5cd..129e5874 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,6 +7,7 @@ libs/openagent/sandbox/vm/setup_lite/steps/*.sh text eol=lf libs/hexagent/sandbox/vm/setup_lite/*.sh text eol=lf libs/hexagent/sandbox/vm/setup_lite/steps/*.sh text eol=lf libs/openagent/sandbox/vm/wsl/prebuilt/openagent-prebuilt.tar filter=lfs diff=lfs merge=lfs -text +libs/hexagent_demo/electron/prebuilt/hexagent-prebuilt.tar filter=lfs diff=lfs merge=lfs -text libs/hexagent_demo/electron/resources/wsl/*.msi filter=lfs diff=lfs merge=lfs -text libs/hexagent_demo/electron/*.msi filter=lfs diff=lfs merge=lfs -text libs/hexagent_demo/electron/resources/wsl/ubuntu-base-24.04-amd64.tar.gz filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 6c9f2a64..2c8c93d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -# Byte-compiled / optimized / DLL files +# Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] *$py.class @@ -227,8 +227,11 @@ libs/hexagent_demo/electron/dist.zip # Vite dev-server cache **/.vite/ -# WSL prebuilt VM image (build artifact — generate with prepare-wsl-prebuilt.ps1) +# WSL prebuilt VM image (build artifact, generated by prepare-wsl-prebuilt.ps1) **/wsl/prebuilt/*.tar +libs/hexagent_demo/electron/prebuilt/*.tar +!libs/hexagent_demo/electron/prebuilt/hexagent-prebuilt.tar # One-off investigation/diagnostic reports reports/ + diff --git a/libs/hexagent/hexagent/computer/local/_wsl.py b/libs/hexagent/hexagent/computer/local/_wsl.py index 1a3cdb1a..80a0a019 100644 --- a/libs/hexagent/hexagent/computer/local/_wsl.py +++ b/libs/hexagent/hexagent/computer/local/_wsl.py @@ -27,6 +27,7 @@ import sys import time from pathlib import Path +from typing import Any from hexagent.computer.base import ExecutionMetadata from hexagent.computer.local._types import ResolvedMount @@ -35,6 +36,39 @@ logger = logging.getLogger(__name__) + +# --- WSL Logging --- +def _get_wsl_log_file() -> Path: + """Return the path to wsl.log.""" + data_dir = os.environ.get("HEXAGENT_DATA_DIR") + base = Path(data_dir) if data_dir else Path.home() / ".hexagent" + log_dir = base / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + return log_dir / "wsl.log" + + +_wsl_logger = logging.getLogger("hexagent.wsl") +_wsl_logger.setLevel(logging.DEBUG) +if not any(isinstance(h, logging.FileHandler) and h.baseFilename == str(_get_wsl_log_file().resolve()) for h in _wsl_logger.handlers): + log_file = _get_wsl_log_file() + _fh = logging.FileHandler(log_file, encoding="utf-8") + _fh.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")) + _wsl_logger.addHandler(_fh) + # Ensure logs are visible in the main logger too + _wsl_logger.propagate = True + _wsl_logger.info("WSL LOG FILE: %s", log_file.resolve()) + + +def wsl_log(msg: str, *args: Any, level: int = logging.INFO) -> None: + """Log a message to the dedicated wsl.log and flush it.""" + _wsl_logger.log(level, msg, *args) + for h in _wsl_logger.handlers: + if isinstance(h, logging.FileHandler): + h.flush() + + +# ------------------- + # UNC path prefixes for accessing WSL filesystem from Windows. # Modern Windows 11 uses ``wsl.localhost``; older builds use ``wsl$``. _UNC_PREFIXES = (r"\\wsl.localhost", r"\\wsl$") @@ -375,27 +409,30 @@ async def start(self) -> None: async def apply_mounts(self, mounts: list[ResolvedMount]) -> None: """Apply mount configuration to the distribution. - Writes the config, terminates the distro (clearing old bind - mounts), restarts, and applies new bind mounts. + Writes the config. If the distro is running, it applies the mounts + live via ``mount --bind`` to avoid a full restart. If stopped, + they will be applied on the next ``start()``. Args: mounts: Complete list of resolved mounts. Replaces all existing mounts in ``mounts.json``. Raises: - WslError: If the distribution does not exist or start fails. + WslError: If the distribution does not exist or live apply fails. """ current = await self.status() if current is None: msg = f"WSL distro '{self._instance}' does not exist" raise WslError(msg) + wsl_log("WslVM.apply_mounts: Updating mounts.json with %d mounts", len(mounts)) self.write_mounts(mounts) if current == "Running": - await self.stop() - - await self.start() + wsl_log("WslVM.apply_mounts: Distro is running, applying mounts live to avoid restart") + await self._apply_bind_mounts() + else: + wsl_log("WslVM.apply_mounts: Distro is not running, mounts will be applied on next start") async def stop(self) -> None: """Terminate the WSL distribution. @@ -467,6 +504,7 @@ async def shell( exec_args += ["-c", inner] start_time = time.monotonic() + wsl_log("WSL Shell Execution (Instance: %s, User: %s, CWD: %s): %s", self._instance, user or "default", cwd or "default", command) process = await asyncio.create_subprocess_exec( *exec_args, @@ -485,17 +523,22 @@ async def shell( process.kill() await process.wait() msg = f"timed out after {timeout}s" + wsl_log("WSL Shell TIMEOUT (Instance: %s): %s", self._instance, msg, level=logging.ERROR) raise WslError(msg) from None stdout = _decode_wsl_output(stdout_bytes).removesuffix("\n") stderr = _decode_wsl_output(stderr_bytes).removesuffix("\n") rc: int = process.returncode if process.returncode is not None else -1 + duration_ms = int((time.monotonic() - start_time) * 1000) + + wsl_log("WSL Shell Result (Exit: %d, Duration: %dms):\nSTDOUT: %s\nSTDERR: %s", rc, duration_ms, stdout or "(empty)", stderr or "(empty)") + return CLIResult( stdout=stdout, stderr=stderr, exit_code=rc, - metadata=ExecutionMetadata(duration_ms=int((time.monotonic() - start_time) * 1000)), + metadata=ExecutionMetadata(duration_ms=duration_ms), ) # ------------------------------------------------------------------ @@ -546,6 +589,7 @@ async def _run_wsl(self, *cmd: str, timeout: float = 300) -> str: # noqa: ASYNC Raises: WslError: On non-zero exit code or timeout. """ + wsl_log("WSL.exe Command (Instance: %s): %s", self._instance, " ".join(cmd)) proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, @@ -561,16 +605,29 @@ async def _run_wsl(self, *cmd: str, timeout: float = 300) -> str: # noqa: ASYNC proc.kill() await proc.wait() msg = f"wsl.exe timed out after {timeout}s: {' '.join(cmd[:3])}" + wsl_log("WSL.exe TIMEOUT (Instance: %s): %s", self._instance, msg, level=logging.ERROR) raise WslError(msg) from None + stdout = _decode_wsl_output(stdout_bytes) + stderr = _decode_wsl_output(stderr_bytes) + if proc.returncode != 0: - stderr = _decode_wsl_output(stderr_bytes).strip() - msg = f"wsl.exe failed (exit {proc.returncode}): {stderr}" + stderr_strip = stderr.strip() + msg = f"wsl.exe failed (exit {proc.returncode}): {stderr_strip}" + wsl_log( + "WSL.exe ERROR (Instance: %s, Exit: %d):\nSTDOUT: %s\nSTDERR: %s", + self._instance, + proc.returncode, + stdout.strip() or "(empty)", + stderr_strip or "(empty)", + level=logging.ERROR, + ) raise WslError(msg) - return _decode_wsl_output(stdout_bytes) + wsl_log("WSL.exe Success (Instance: %s):\nSTDOUT: %s", self._instance, stdout.strip() or "(empty)") + return stdout - async def _apply_bind_mounts(self) -> None: + async def _apply_bind_mounts(self) -> None: # noqa: PLR0912, PLR0915 """Apply all bind mounts from ``mounts.json`` inside the distro. Idempotent: skips mounts that are already active (detected via @@ -578,34 +635,78 @@ async def _apply_bind_mounts(self) -> None: """ mounts = self.read_mounts() if not mounts: + wsl_log("WSL applying bind mounts: No mounts found in mounts.json") return - logger.debug("WSL applying %d bind mount(s) from mounts.json", len(mounts)) + wsl_log("WSL applying %d bind mount(s) from mounts.json", len(mounts)) + + # Diagnostic: Log all current mounts + all_mounts = await self.shell("mount", user="root") + wsl_log("WSL Current System Mounts:\n%s", all_mounts.stdout or "(empty)") for m in mounts: - wsl_host = _win_path_to_wsl(m.host_path) + # Use wslpath to get the accurate Linux path for the Windows host path. + # This handles drive letters, mount points, and case-sensitivity correctly. + wsl_host_res = await self.shell(f"wslpath -u {shlex.quote(m.host_path)}", user="root") + if wsl_host_res.exit_code == 0 and wsl_host_res.stdout.strip(): + wsl_host = wsl_host_res.stdout.strip() + else: + wsl_host = _win_path_to_wsl(m.host_path) + wsl_log("WSL wslpath failed for %s, falling back to %s", m.host_path, wsl_host, level=logging.WARNING) + qguest = shlex.quote(m.guest_path) qhost = shlex.quote(wsl_host) # Skip if already mounted. check = await self.shell(f"mountpoint -q {qguest}", user="root") if check.exit_code == 0: - logger.info( - "WSL bind-mount already active: guest=%s wsl_source=%s host_win=%s", - m.guest_path, - wsl_host, - m.host_path, - ) - continue + # Even if mounted, check if it's empty + ls_check = await self.shell(f"ls -A {qguest}", user="root") + if ls_check.stdout.strip(): + wsl_log( + "WSL bind-mount already active and NOT empty: guest=%s host_win=%s", + m.guest_path, + m.host_path, + ) + continue + + wsl_log("WSL bind-mount active but EMPTY, forcing re-mount: %s", m.guest_path, level=logging.WARNING) + await self.shell(f"umount -l {qguest}", user="root") + + # Diagnostic: check source path existence and content + src_ls = await self.shell(f"ls -ld {qhost} && ls -A {qhost} | head -n 5", user="root") + wsl_log("WSL Source Path Check (%s):\n%s", wsl_host, src_ls.stdout or "(empty)") + + wsl_log("WSL applying NEW sync/copy: %s -> %s", m.host_path, m.guest_path) - cmd = f"mkdir -p {qguest} && mount --bind {qhost} {qguest}" - if not m.writable: - cmd += f" && mount -o remount,ro,bind {qguest}" + # Ensure target parent exists + guest_parent = str(Path(m.guest_path).parent).replace("\\", "/") + setup_cmd = f"mkdir -p {shlex.quote(guest_parent)} && chmod 777 {shlex.quote(guest_parent)}" + await self.shell(setup_cmd, user="root") - result = await self.shell(cmd, user="root") - if result.exit_code != 0: - msg = f"Failed to bind-mount {m.host_path} → {m.guest_path}: {result.stderr}" - raise WslError(msg) + # Check if this is a skills directory (usually in /mnt/skills/) + is_skill = "/mnt/skills/" in m.guest_path + + if is_skill: + wsl_log("WSL: Skills directory detected, using CP instead of BIND for reliability") + # 1. Clean up potential old mount points + await self.shell(f"umount -l {qguest} 2>/dev/null || true", user="root") + # 2. Sync files from Windows to WSL internal storage + sync_cmd = f"mkdir -p {qguest} && cp -r {qhost}/* {qguest}/ 2>/dev/null || true && chmod -R 777 {qguest}" + result = await self.shell(sync_cmd, user="root") + wsl_log("WSL Skills sync finished (Exit: %d)", result.exit_code) + else: + # For workspace/working dirs, we still try bind mount as they need real-time sync. + wsl_log("WSL: Workspace directory detected, using robust bind mount") + cmd = f"mkdir -p {qguest} && chmod 777 {qguest} && mount --bind {qhost} {qguest}" + if not m.writable: + cmd += f" && mount -o remount,ro,bind {qguest}" + + result = await self.shell(cmd, user="root") + if result.exit_code != 0: + wsl_log("WSL mount failed, falling back to symlink: %s", result.stderr, level=logging.WARNING) + ln_cmd = f"rm -rf {qguest} && ln -s {qhost} {qguest}" + result = await self.shell(ln_cmd, user="root") # Cowork mounts bind a DrvFs path (/mnt//...) into /sessions//mnt/.... # DrvFs often presents files as root:root with mode 755 to Linux UIDs that are not @@ -620,26 +721,31 @@ async def _apply_bind_mounts(self) -> None: ) if m.writable and sess is not None and not skip_chown: quser = shlex.quote(sess) + wsl_log("WSL post-bind chown for session user: %s -> %s", sess, m.guest_path) own = await self.shell(f"chown -R {quser}:{quser} {qguest}", user="root") if own.exit_code != 0: - logger.warning( + wsl_log( "WSL post-bind chown failed (session=%s guest=%s): %s", sess, m.guest_path, (own.stderr or own.stdout or "").strip() or "(empty)", + level=logging.WARNING, ) # Post-verify so logs show whether the guest path is really a mount (debug empty ls issues). verify = await self.shell(f"findmnt -n {qguest}", user="root") + content_verify = await self.shell(f"ls -A {qguest} | head -n 5", user="root") + if verify.exit_code == 0 and (verify.stdout or "").strip(): - logger.info( - "WSL bind-mount applied+verified: guest=%s host_win=%s findmnt=%s", + wsl_log( + "WSL bind-mount applied+verified: guest=%s host_win=%s findmnt=%s Content: %s", m.guest_path, m.host_path, verify.stdout.strip().replace("\n", " | "), + content_verify.stdout.strip().replace("\n", ", ") or "(empty)", ) else: - logger.warning( + wsl_log( "WSL bind-mount mount(8) succeeded but findmnt could not confirm guest=%s " "(host_win=%s, wsl_source=%s, findmnt_exit=%s, findmnt_stderr=%s)", m.guest_path, @@ -647,6 +753,7 @@ async def _apply_bind_mounts(self) -> None: wsl_host, verify.exit_code, (verify.stderr or "").strip() or "(empty)", + level=logging.WARNING, ) async def _resolve_unc_prefix(self) -> str: diff --git a/libs/hexagent/hexagent/computer/local/vm_win.py b/libs/hexagent/hexagent/computer/local/vm_win.py index 3516c382..4f1e38ef 100644 --- a/libs/hexagent/hexagent/computer/local/vm_win.py +++ b/libs/hexagent/hexagent/computer/local/vm_win.py @@ -27,6 +27,7 @@ from __future__ import annotations import asyncio +import logging import os import shlex import uuid @@ -272,10 +273,12 @@ def __init__(self, *, instance: str = "hexagent") -> None: # Platform check is delegated to WslVM.__init__(), which raises # UnsupportedPlatformError on non-Windows platforms. from hexagent.computer.local._wsl import WslVM as _WslVM + from hexagent.computer.local._wsl import wsl_log self._vm: WslVM = _WslVM(instance=instance) self._instance = instance self._lock = asyncio.Lock() + wsl_log("LocalVM initialized (Instance: %s)", instance) # ------------------------------------------------------------------ # Public API @@ -297,7 +300,7 @@ async def stop(self) -> None: return await self._vm.stop() - async def mount( # noqa: PLR0912 + async def mount( # noqa: PLR0912, PLR0915 self, mounts: Mount | list[Mount], *, @@ -321,10 +324,13 @@ async def mount( # noqa: PLR0912 VMMountConflictError: Guest path conflict with different config. VMError: Session does not exist on the distro. """ + from hexagent.computer.local._wsl import wsl_log + mount_list = [mounts] if isinstance(mounts, Mount) else list(mounts) if not mount_list: return + wsl_log("LocalVM.mount(session=%s, defer=%s): %s", session, defer, [m.source for m in mount_list]) self._validate_mounts(mount_list) # Validate session user exists on the distro. @@ -332,6 +338,7 @@ async def mount( # noqa: PLR0912 result = await self._vm.shell(f"id -u {shlex.quote(session)}") if result.exit_code != 0: msg = f"Session '{session}' does not exist on the distro" + wsl_log("LocalVM.mount ERROR: %s", msg, level=logging.ERROR) raise VMError(msg) scope = "session" if session is not None else "system" @@ -349,6 +356,7 @@ async def mount( # noqa: PLR0912 # Config may already contain the mount while the live bind mount # was lost (for example after external WSL restart). Verify and # self-heal by restoring only the missing live bind mount. + wsl_log("LocalVM.mount: All mounts already in config, checking live status for self-heal...") for r in resolved_new: probe = await self._vm.shell(f"findmnt -n {shlex.quote(r.guest_path)}") if probe.exit_code != 0: @@ -357,12 +365,14 @@ async def mount( # noqa: PLR0912 wsl_host = _win_path_to_wsl(r.host_path) qguest = shlex.quote(r.guest_path) qhost = shlex.quote(wsl_host) + wsl_log("LocalVM.mount SELF-HEAL: Restoring lost bind mount %s -> %s", r.host_path, r.guest_path) cmd = f"mkdir -p {qguest} && mount --bind {qhost} {qguest}" if not r.writable: cmd += f" && mount -o remount,ro,bind {qguest}" remount = await self._vm.shell(cmd, user="root") if remount.exit_code != 0: msg = f"Failed to self-heal mount '{r.guest_path}': {remount.stderr or remount.stdout}" + wsl_log("LocalVM.mount SELF-HEAL FAILED: %s", msg, level=logging.ERROR) raise VMError(msg) sess = _session_user_from_guest_mount_path(r.guest_path) @@ -373,24 +383,30 @@ async def mount( # noqa: PLR0912 ) if r.writable and sess is not None and not skip_chown: quser = shlex.quote(sess) + wsl_log("LocalVM.mount SELF-HEAL chown: %s -> %s", sess, r.guest_path) await self._vm.shell(f"chown -R {quser}:{quser} {qguest}", user="root") verify = await self._vm.shell(f"findmnt -n {qguest}", user="root") if verify.exit_code != 0 or not (verify.stdout or "").strip(): msg = f"Mount self-heal verification failed for '{r.guest_path}'" + wsl_log("LocalVM.mount SELF-HEAL VERIFY FAILED: %s", msg, level=logging.ERROR) raise VMError(msg) return + wsl_log("LocalVM.mount: Adding %d new mount(s) to config", len(truly_new)) existing_guests = {r.guest_path for r in existing} for r in truly_new: if r.guest_path in existing_guests: msg = f"Mount conflict: guest path '{r.guest_path}' is already in use" + wsl_log("LocalVM.mount ERROR: %s", msg, level=logging.ERROR) raise VMMountConflictError(msg) merged = existing + truly_new if defer: + wsl_log("LocalVM.mount: DEFERRED update to mounts.json") self._vm.write_mounts(merged) else: + wsl_log("LocalVM.mount: IMMEDIATE apply (distro restart)") await self._vm.apply_mounts(merged) async def unmount( diff --git a/libs/hexagent/hexagent/prompts/fragments/tool_instruction_presenttouser.md b/libs/hexagent/hexagent/prompts/fragments/tool_instruction_presenttouser.md index aec4e035..1e7332b7 100644 --- a/libs/hexagent/hexagent/prompts/fragments/tool_instruction_presenttouser.md +++ b/libs/hexagent/hexagent/prompts/fragments/tool_instruction_presenttouser.md @@ -16,3 +16,4 @@ How it works: - Multiple files can be presented efficiently in a single call - If a file is not in the output directory, it will be automatically copied into that directory - The first input path passed in to the PresentToUser tool, and therefore the first output path returned from it, should correspond to the file that is most relevant for the user to see first +- In the same turn, do not call PresentToUser repeatedly for the same file path. If the file list is unchanged, reuse the existing result instead of presenting it again diff --git a/libs/hexagent/tests/unit_tests/computer/test_wsl.py b/libs/hexagent/tests/unit_tests/computer/test_wsl.py index 164e1ae1..0139ee10 100644 --- a/libs/hexagent/tests/unit_tests/computer/test_wsl.py +++ b/libs/hexagent/tests/unit_tests/computer/test_wsl.py @@ -724,14 +724,12 @@ async def test_apply_stops_running_distro(self) -> None: with ( patch.object(vm, "status", new_callable=AsyncMock, return_value="Running"), - patch.object(vm, "stop", new_callable=AsyncMock) as mock_stop, - patch.object(vm, "start", new_callable=AsyncMock) as mock_start, + patch.object(vm, "_apply_bind_mounts", new_callable=AsyncMock) as mock_apply, patch.object(vm, "write_mounts"), ): mounts = [ResolvedMount(host_path=r"C:\h", guest_path="/g", writable=False)] await vm.apply_mounts(mounts) - mock_stop.assert_awaited_once() - mock_start.assert_awaited_once() + mock_apply.assert_awaited_once() async def test_apply_raises_if_instance_missing(self) -> None: with ( @@ -771,27 +769,36 @@ async def test_applies_bind_mounts(self) -> None: patch.object(vm, "read_mounts", return_value=mounts), patch.object(vm, "shell", new_callable=AsyncMock) as mock_shell, ): - # First call: mountpoint check (not mounted) - # Second call: actual mount - mock_shell.side_effect = [ - _fail(), # mountpoint -q /mnt/code -> not mounted - _ok(), # mount --bind ... /mnt/code - _ok(stdout="/mnt/c/Users/foo/code"), # findmnt -n /mnt/code - _fail(), # mountpoint -q /mnt/data -> not mounted - _ok(), # mount --bind ... /mnt/data (+ remount ro) - _ok(stdout="/mnt/d/data"), # findmnt -n /mnt/data - ] + + async def _shell_side_effect(command: str, *_: Any, **__: Any) -> CLIResult: # noqa: PLR0911 + if command.startswith("wslpath -u"): + if "C:\\Users\\foo\\code" in command: + return _ok(stdout="/mnt/c/Users/foo/code") + if "D:\\data" in command: + return _ok(stdout="/mnt/d/data") + if command == "mountpoint -q /mnt/code": + return _fail() + if command == "mountpoint -q /mnt/data": + return _fail() + if command.startswith("findmnt -n "): + return _ok(stdout="/dev/mock") + if command.startswith(("ls -A /mnt/code | head -n 5", "ls -A /mnt/data | head -n 5")): + return _ok(stdout="ok") + return _ok() + + mock_shell.side_effect = _shell_side_effect await vm._apply_bind_mounts() - assert mock_shell.await_count == 6 + mount_calls = [c.args[0] for c in mock_shell.call_args_list if "mount --bind" in c.args[0]] + assert len(mount_calls) == 2 # Check writable mount (no remount) - mount_call_1 = mock_shell.call_args_list[1].args[0] + mount_call_1 = mount_calls[0] assert "mount --bind" in mount_call_1 assert "/mnt/c/Users/foo/code" in mount_call_1 assert "remount,ro" not in mount_call_1 # Check read-only mount (remount ro) - mount_call_2 = mock_shell.call_args_list[4].args[0] + mount_call_2 = mount_calls[1] assert "mount --bind" in mount_call_2 assert "remount,ro" in mount_call_2 @@ -810,17 +817,23 @@ async def test_chown_after_writable_session_bind(self) -> None: patch.object(vm, "read_mounts", return_value=mounts), patch.object(vm, "shell", new_callable=AsyncMock) as mock_shell, ): - mock_shell.side_effect = [ - _fail(), # mountpoint -q - _ok(), # mount --bind - _ok(), # chown -R alice:alice - _ok(stdout="/mnt/d/w"), # findmnt - ] + + async def _shell_side_effect(command: str, *_: Any, **__: Any) -> CLIResult: + if command.startswith("wslpath -u") and "D:\\w" in command: + return _ok(stdout="/mnt/d/w") + if command == "mountpoint -q /sessions/alice/mnt/work": + return _fail() + if command.startswith("findmnt -n "): + return _ok(stdout="/dev/mock") + if command.startswith("ls -A /sessions/alice/mnt/work | head -n 5"): + return _ok(stdout="ok") + return _ok() + + mock_shell.side_effect = _shell_side_effect await vm._apply_bind_mounts() - assert mock_shell.await_count == 4 - chown_call = mock_shell.call_args_list[2].args[0] + chown_call = next(c.args[0] for c in mock_shell.call_args_list if "chown -R" in c.args[0]) assert "chown -R" in chown_call assert "alice:alice" in chown_call assert "/sessions/alice/mnt/work" in chown_call @@ -840,12 +853,22 @@ async def test_skips_already_mounted(self) -> None: patch.object(vm, "read_mounts", return_value=mounts), patch.object(vm, "shell", new_callable=AsyncMock) as mock_shell, ): - mock_shell.return_value = _ok() # mountpoint -q succeeds (already mounted) + + async def _shell_side_effect(command: str, *_: Any, **__: Any) -> CLIResult: + if command.startswith("wslpath -u") and "C:\\code" in command: + return _ok(stdout="/mnt/c/code") + if command == "mountpoint -q /mnt/code": + return _ok() + if command == "ls -A /mnt/code": + return _ok(stdout="existing-file") + return _ok() + + mock_shell.side_effect = _shell_side_effect await vm._apply_bind_mounts() - # Only the mountpoint check, no mount --bind call - assert mock_shell.await_count == 1 + # When already mounted and non-empty, bind step is skipped. + assert not any("mount --bind" in c.args[0] for c in mock_shell.call_args_list) async def test_empty_mounts_is_noop(self) -> None: with ( diff --git a/libs/hexagent_demo/backend/.gitignore b/libs/hexagent_demo/backend/.gitignore index bc7729de..be306a7f 100644 --- a/libs/hexagent_demo/backend/.gitignore +++ b/libs/hexagent_demo/backend/.gitignore @@ -43,6 +43,7 @@ uploads/ # Private skills (user-uploaded, runtime-generated) skills/private/ skills-inactive/ +skills/ # Bundled example/public skills (license restrictions) # skills/examples/ diff --git a/libs/hexagent_demo/backend/hexagent_api/routes/chat.py b/libs/hexagent_demo/backend/hexagent_api/routes/chat.py index 774df14f..401bb6be 100644 --- a/libs/hexagent_demo/backend/hexagent_api/routes/chat.py +++ b/libs/hexagent_demo/backend/hexagent_api/routes/chat.py @@ -12,6 +12,7 @@ import re import shlex import shutil +import sys import tempfile import time import urllib.parse @@ -748,20 +749,41 @@ def _trigger_preconvert(conversation_id: str, tool_output: str) -> None: "/Applications/LibreOffice.app/Contents/MacOS/soffice", # macOS "/usr/bin/soffice", # Linux "/usr/local/bin/soffice", # Linux (manual install) + os.path.join(os.environ.get("ProgramFiles", r"C:\Program Files"), "LibreOffice", "program", "soffice.exe"), # Windows + os.path.join(os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"), "LibreOffice", "program", "soffice.exe"), # Windows x86 path + os.path.join(os.environ.get("LOCALAPPDATA", r"C:\Users\Public\AppData\Local"), "Programs", "LibreOffice", "program", "soffice.exe"), # Windows per-user ] def _find_soffice() -> str | None: """Find the soffice binary on the host machine.""" - found = shutil.which("soffice") - if found: - return found + for binary in ("soffice", "soffice.exe"): + found = shutil.which(binary) + if found: + return found for p in _SOFFICE_SEARCH_PATHS: if os.path.isfile(p) and os.access(p, os.X_OK): return p return None +def _libreoffice_install_hint() -> str: + if os.name == "nt": + return ( + "Install it to enable presentation preview: " + "winget install TheDocumentFoundation.LibreOffice" + ) + if sys.platform == "darwin": + return ( + "Install it to enable presentation preview: " + "brew install --cask libreoffice" + ) + return ( + "Install it to enable presentation preview (for example: " + "sudo apt install libreoffice)." + ) + + def _cleanup_dir(dir_path: str) -> None: """Remove a temp directory and all its contents (best effort).""" shutil.rmtree(dir_path, ignore_errors=True) @@ -967,11 +989,7 @@ async def preview_office_file( if soffice_bin is None: raise HTTPException( status_code=422, - detail=( - "LibreOffice is not installed. " - "Install it to enable presentation preview: " - "brew install --cask libreoffice" - ), + detail=f"LibreOffice is not installed. {_libreoffice_install_hint()}", ) # Wait for any in-flight pre-conversion for this file diff --git a/libs/hexagent_demo/backend/hexagent_api/routes/setup.py b/libs/hexagent_demo/backend/hexagent_api/routes/setup.py index 3fc6d878..c8c398b2 100644 --- a/libs/hexagent_demo/backend/hexagent_api/routes/setup.py +++ b/libs/hexagent_demo/backend/hexagent_api/routes/setup.py @@ -358,14 +358,41 @@ def _wsl_prebuilt_tar_path() -> Path | None: """Return an offline WSL rootfs archive if present. Search order: - 1. Backend-bundled VM assets (PyInstaller ``sandbox/vm/wsl/prebuilt``) - 2. Electron extraResources path from ``HEXAGENT_WSL_OFFLINE_DIR`` (if set) + 1. Explicit file path from ``HEXAGENT_WSL_PREBUILT_TAR`` + 2. ``HEXAGENT_WSL_PREBUILT_DIR`` / ``HEXAGENT_APP_DIR`` / CWD + 3. Legacy bundled locations (for backward compatibility) """ - candidate_dirs: list[Path] = [vm_setup_dir().parent / "wsl" / "prebuilt"] + explicit_tar = os.environ.get("HEXAGENT_WSL_PREBUILT_TAR", "").strip() + if explicit_tar: + explicit_path = Path(explicit_tar) + if explicit_path.is_file(): + return explicit_path + + candidate_dirs: list[Path] = [] + seen: set[str] = set() + + def add_dir(raw: str) -> None: + raw = (raw or "").strip() + if not raw: + return + try: + p = Path(raw).resolve() + except Exception: # pragma: no cover - defensive + return + key = str(p).lower() + if key in seen: + return + seen.add(key) + candidate_dirs.append(p) + + # Preferred external locations for split package distribution. + add_dir(os.environ.get("HEXAGENT_WSL_PREBUILT_DIR", "")) + add_dir(os.environ.get("HEXAGENT_APP_DIR", "")) + add_dir(str(Path.cwd())) - offline_dir = os.environ.get("HEXAGENT_WSL_OFFLINE_DIR", "").strip() - if offline_dir: - candidate_dirs.append(Path(offline_dir)) + # Backward-compatible locations. + add_dir(str(vm_setup_dir().parent / "wsl" / "prebuilt")) + add_dir(os.environ.get("HEXAGENT_WSL_OFFLINE_DIR", "")) for prebuilt_dir in candidate_dirs: for name in _WSL_PREBUILT_CANDIDATES: @@ -1190,9 +1217,9 @@ async def _run_wsl(self) -> None: prebuilt_tar = _wsl_prebuilt_tar_path() import_dir = data_dir() / "wsl" / _WSL_INSTANCE / "disk" - # Distro does not exist: prefer bundled prebuilt HexAgent rootfs. + # Distro does not exist: prefer external prebuilt rootfs if available. if prebuilt_tar is not None: - self._emit("progress", {"step": "creating", "message": "Importing bundled HexAgent VM image..."}) + self._emit("progress", {"step": "creating", "message": f"Importing local HexAgent VM image ({prebuilt_tar.name})..."}) if import_dir.exists(): shutil.rmtree(import_dir, ignore_errors=True) import_dir.mkdir(parents=True, exist_ok=True) @@ -1206,12 +1233,12 @@ async def _run_wsl(self) -> None: _, err_b = await self._communicate_with_heartbeat( proc_import, step="creating", - message="Importing bundled HexAgent VM image...", + message="Importing local HexAgent VM image...", progress_info=lambda: f"(image ~{(prebuilt_tar.stat().st_size / (1024 * 1024)):.1f} MB)", ) if proc_import.returncode != 0: err = _decode_wsl_output(err_b or b"").strip() - self._emit("error", {"message": err or f"Bundled image import failed (exit {proc_import.returncode})"}) + self._emit("error", {"message": err or f"Local prebuilt image import failed (exit {proc_import.returncode})"}) self._status = "error" self._error = f"exit {proc_import.returncode}" return @@ -1226,7 +1253,7 @@ async def _run_wsl(self) -> None: retries_on_missing_disk=6, ) if ok: - self._emit("done", {"message": "WSL distro imported from bundled image and started successfully"}) + self._emit("done", {"message": "WSL distro imported from local prebuilt image and started successfully"}) self._status = "done" else: self._emit("error", {"message": err}) @@ -1234,7 +1261,7 @@ async def _run_wsl(self) -> None: self._error = err return - # Fallback: bootstrap from Ubuntu export. + # Fallback: bootstrap from network Ubuntu install/export. self._emit("progress", {"step": "creating", "message": "Preparing source distro (Ubuntu)..."}) entries = await _wsl_list() source_distro = _pick_wsl_source_distro(entries) diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/LICENSE.txt b/libs/hexagent_demo/backend/skills/email-mail-master/LICENSE.txt deleted file mode 100644 index 6d0c1da7..00000000 --- a/libs/hexagent_demo/backend/skills/email-mail-master/LICENSE.txt +++ /dev/null @@ -1,24 +0,0 @@ -MIT License - -Copyright (c) 2026 Mail-Master - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - ---- - diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/SKILL.md b/libs/hexagent_demo/backend/skills/email-mail-master/SKILL.md deleted file mode 100644 index 2970d846..00000000 --- a/libs/hexagent_demo/backend/skills/email-mail-master/SKILL.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -name: email-mail-master 万能邮箱助手 -description: 通过阿里云邮箱、QQ邮箱或163邮箱等发送和接收邮件。支持发送普通邮件、带附件邮件、接收邮件、检查新邮件。当用户要求发送邮件、查看邮件、检查新邮件时使用。 - ---- - -# 邮件管理 - -通过阿里云邮箱、QQ邮箱或163邮箱等发送和接收邮件。 - -## 配置 - -编辑 `skills/email/scripts/config.json`,填写邮箱地址和授权码(非登录密码)。 - -授权码获取: -- QQ 邮箱:设置 > 账户 > 开启 IMAP/SMTP > 生成授权码 -- 163 邮箱:设置 > POP3/SMTP/IMAP > 开启服务 > 设置授权密码 - -可通过 `default_mailbox` 字段设置默认邮箱(`"qq"` 或 `"163"`)。 - -## 命令行调用 - -```bash -# 发送邮件 -python3 skills/email/scripts/mail.py send --to user@example.com --subject "主题" --content "内容" - -# 发送带附件 -python3 skills/email/scripts/mail.py send --to user@example.com --subject "报告" --content "请查收" --attach report.pdf - -# 接收最新邮件 -python3 skills/email/scripts/mail.py receive --limit 5 - -# 接收邮件(JSON 输出,推荐 AI 使用) -python3 skills/email/scripts/mail.py receive --limit 5 --json - -# 检查新邮件(最近 N 天) -python3 skills/email/scripts/mail.py check-new --since 1 - -# 检查新邮件(JSON 输出) -python3 skills/email/scripts/mail.py check-new --since 1 --json - -# 删除邮件(移到已删除文件夹,QQ邮箱可恢复) -python3 skills/email/scripts/mail.py delete --ids 123 - -# 批量删除 -python3 skills/email/scripts/mail.py delete --ids 123 124 125 - -# 彻底删除(不可恢复) -python3 skills/email/scripts/mail.py delete --ids 123 --permanent - -# 指定邮箱类型 -python3 skills/email/scripts/mail.py --mailbox 163 send --to user@example.com --subject "测试" -``` - -## 删除邮件说明 - -- QQ 邮箱(IMAP):默认移到「已删除」文件夹,可以从已删除中恢复。加 `--permanent` 彻底删除。 -- 163 邮箱(POP3):POP3 协议不支持文件夹操作,删除始终是永久的,不可恢复。 diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/_meta.json b/libs/hexagent_demo/backend/skills/email-mail-master/_meta.json deleted file mode 100644 index f1c5341d..00000000 --- a/libs/hexagent_demo/backend/skills/email-mail-master/_meta.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "ownerId": "kn7bb6tkjndgyzp6fh2vgvagq982ee7c", - "slug": "email-mail-master", - "version": "1.0.0", - "publishedAt": 1772888109581 -} \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/email_manager.py b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/email_manager.py deleted file mode 100644 index 8bee782a..00000000 --- a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/email_manager.py +++ /dev/null @@ -1,557 +0,0 @@ -"""邮箱管理核心模块""" -import imaplib -import poplib -import smtplib -import email -from email.header import decode_header, Header -from email.mime.text import MIMEText -from email.mime.multipart import MIMEMultipart -from datetime import datetime, timedelta -import json -import os -from typing import List, Dict, Optional - - -class EmailManager: - """邮箱管理器基类""" - - def __init__(self, email_address: str, password: str, - imap_server: str, imap_port: int, - smtp_server: str, smtp_port: int): - self.email_address = email_address - self.password = password - self.imap_server = imap_server - self.imap_port = imap_port - self.smtp_server = smtp_server - self.smtp_port = smtp_port - - def decode_str(self, s): - """解码邮件头部字符串""" - if s is None: - return "" - - value, charset = decode_header(s)[0] - if charset: - try: - value = value.decode(charset) - except: - try: - value = value.decode('utf-8', errors='ignore') - except: - value = str(value) - elif isinstance(value, bytes): - try: - value = value.decode('utf-8', errors='ignore') - except: - value = str(value) - return value - - def get_email_content(self, msg): - """获取邮件正文内容(仅纯文本,HTML 转纯文本)""" - import re - from html import unescape - - content = "" - - if msg.is_multipart(): - # 优先查找 text/plain 部分 - for part in msg.walk(): - content_type = part.get_content_type() - if content_type == 'text/plain': - try: - payload = part.get_payload(decode=True) - charset = part.get_content_charset() or 'utf-8' - content = payload.decode(charset, errors='ignore') - break - except: - pass - - # 如果没有纯文本,尝试从 HTML 提取 - if not content: - for part in msg.walk(): - content_type = part.get_content_type() - if content_type == 'text/html': - try: - payload = part.get_payload(decode=True) - charset = part.get_content_charset() or 'utf-8' - html_content = payload.decode(charset, errors='ignore') - content = self._html_to_text(html_content) - break - except: - pass - else: - # 单部分邮件 - try: - payload = msg.get_payload(decode=True) - charset = msg.get_content_charset() or 'utf-8' - raw_content = payload.decode(charset, errors='ignore') - - # 根据 Content-Type 处理 - if msg.get_content_type() == 'text/html': - content = self._html_to_text(raw_content) - else: - content = raw_content - except: - pass - - return content.strip() - - def _html_to_text(self, html_content: str) -> str: - """将 HTML 转换为纯文本""" - import re - from html import unescape - - # 移除 script 和 style 标签及其内容 - text = re.sub(r']*>.*?]*>', '', html_content, flags=re.DOTALL | re.IGNORECASE) - text = re.sub(r']*>.*?]*>', '', text, flags=re.DOTALL | re.IGNORECASE) - - # 移除所有 HTML 标签 - text = re.sub(r'<[^>]+>', ' ', text) - - # 解码 HTML 实体 - text = unescape(text) - - # 清理多余空白 - text = re.sub(r'\s+', ' ', text) - text = re.sub(r'\n\s*\n', '\n', text) - - return text.strip() - - def receive_emails(self, mailbox: str = 'INBOX', limit: int = 10) -> List[Dict]: - """接收邮件""" - try: - mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) - mail.login(self.email_address, self.password) - mail.select(mailbox) - - status, messages = mail.search(None, 'ALL') - email_ids = messages[0].split() - - emails = [] - for email_id in email_ids[-limit:]: - status, msg_data = mail.fetch(email_id, '(RFC822)') - - for response_part in msg_data: - if isinstance(response_part, tuple): - msg = email.message_from_bytes(response_part[1]) - - subject = self.decode_str(msg.get('Subject', '')) - from_ = self.decode_str(msg.get('From', '')) - date = msg.get('Date', '') - content = self.get_email_content(msg) - - emails.append({ - 'id': email_id.decode(), - 'subject': subject, - 'from': from_, - 'date': date, - 'content': content[:200] + '...' if len(content) > 200 else content - }) - - mail.close() - mail.logout() - - return emails - - except Exception as e: - raise Exception(f"接收邮件失败: {str(e)}") - - def receive_emails_since(self, since_date: datetime, mailbox: str = 'INBOX') -> List[Dict]: - """接收指定日期之后的邮件""" - try: - from email.utils import parsedate_to_datetime - - mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) - mail.login(self.email_address, self.password) - mail.select(mailbox) - - # IMAP SINCE 精度只到天,需要客户端二次过滤 - date_str = since_date.strftime('%d-%b-%Y') - status, messages = mail.search(None, f'(SINCE {date_str})') - email_ids = messages[0].split() - - emails = [] - for email_id in email_ids: - status, msg_data = mail.fetch(email_id, '(RFC822)') - - for response_part in msg_data: - if isinstance(response_part, tuple): - msg = email.message_from_bytes(response_part[1]) - - # 客户端精确过滤:解析邮件日期,跳过早于 since_date 的 - raw_date = msg.get('Date', '') - try: - email_dt = parsedate_to_datetime(raw_date) - # 统一为 naive datetime 比较(去掉时区信息) - if email_dt.tzinfo: - email_dt = email_dt.replace(tzinfo=None) - if email_dt < since_date: - continue - except Exception: - pass # 无法解析日期的邮件仍然保留 - - subject = self.decode_str(msg.get('Subject', '')) - from_ = self.decode_str(msg.get('From', '')) - content = self.get_email_content(msg) - - emails.append({ - 'id': email_id.decode(), - 'subject': subject, - 'from': from_, - 'date': raw_date, - 'content': content[:200] + '...' if len(content) > 200 else content - }) - - mail.close() - mail.logout() - - return emails - - except Exception as e: - raise Exception(f"接收邮件失败: {str(e)}") - - def send_email(self, to_addr: str, subject: str, content: str, - content_type: str = 'plain', attachments: List[str] = None) -> str: - """发送邮件 - - Args: - to_addr: 收件人邮箱 - subject: 邮件主题 - content: 邮件内容 - content_type: 内容类型 ('plain' 或 'html') - attachments: 附件文件路径列表 - - Returns: - 发送结果消息 - """ - try: - from email.mime.application import MIMEApplication - import os - - message = MIMEMultipart() - message['From'] = Header(self.email_address) - message['To'] = Header(to_addr) - message['Subject'] = Header(subject, 'utf-8') - - # 添加邮件正文 - message.attach(MIMEText(content, content_type, 'utf-8')) - - # 添加附件 - if attachments: - for file_path in attachments: - if not os.path.exists(file_path): - raise FileNotFoundError(f"附件文件不存在: {file_path}") - - with open(file_path, 'rb') as f: - attachment = MIMEApplication(f.read()) - filename = os.path.basename(file_path) - attachment.add_header( - 'Content-Disposition', - 'attachment', - filename=('utf-8', '', filename) - ) - message.attach(attachment) - - server = smtplib.SMTP_SSL(self.smtp_server, self.smtp_port) - server.login(self.email_address, self.password) - server.sendmail(self.email_address, [to_addr], message.as_string()) - server.quit() - - result = "邮件发送成功!" - if attachments: - result += f" (包含 {len(attachments)} 个附件)" - return result - - except Exception as e: - raise Exception(f"发送邮件失败: {str(e)}") - - def delete_email(self, email_id: str, mailbox: str = 'INBOX', permanent: bool = False) -> str: - """删除邮件(IMAP) - - 默认移到「已删除」文件夹(可在30天内恢复),permanent=True 则彻底删除(不可恢复)。 - - Args: - email_id: 邮件 ID(receive_emails 返回的 id 字段) - mailbox: 邮箱文件夹,默认 INBOX - permanent: 是否彻底删除(expunge) - - False: 移到"已删除"文件夹(可恢复) - - True: 彻底删除(不可恢复) - - Returns: - 操作结果消息 - - 注意: - - QQ邮箱: permanent=False 时邮件移到"已删除"文件夹,可在30天内恢复 - - 163邮箱: 使用POP3协议,删除操作始终是永久的 - """ - try: - mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) - mail.login(self.email_address, self.password) - mail.select(mailbox) - - # 标记为删除 - mail.store(email_id.encode(), '+FLAGS', '\\Deleted') - - if permanent: - mail.expunge() - result = f"✓ 邮件 {email_id} 已彻底删除(不可恢复)" - else: - result = f"✓ 邮件 {email_id} 已移到已删除文件夹(可在30天内从已删除文件夹恢复)" - - mail.close() - mail.logout() - return result - - except Exception as e: - raise Exception(f"删除邮件失败: {str(e)}") - - def delete_emails_batch(self, email_ids: List[str], mailbox: str = 'INBOX', permanent: bool = False) -> str: - """批量删除邮件(IMAP) - - Args: - email_ids: 邮件 ID 列表 - mailbox: 邮箱文件夹,默认 INBOX - permanent: 是否彻底删除 - - False: 标记为删除,移到"已删除"文件夹(可恢复) - - True: 彻底删除,立即从服务器移除(不可恢复) - - Returns: - 操作结果消息 - - 注意: - - QQ邮箱: permanent=False 时邮件移到"已删除"文件夹,可在30天内恢复 - - 163邮箱: 使用POP3协议,删除操作始终是永久的 - """ - try: - mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) - mail.login(self.email_address, self.password) - mail.select(mailbox) - - # 批量标记为删除 - for eid in email_ids: - mail.store(eid.encode(), '+FLAGS', '\\Deleted') - - if permanent: - mail.expunge() - action = "彻底删除(不可恢复)" - else: - action = "移到已删除文件夹(可在30天内恢复)" - - mail.close() - mail.logout() - - return f"✓ 已{action} {len(email_ids)} 封邮件" - - except Exception as e: - raise Exception(f"批量删除邮件失败: {str(e)}") - - -class QQEmailManager(EmailManager): - """QQ 邮箱管理器 - 使用 IMAP""" - - def __init__(self, email_address: str, auth_code: str): - super().__init__( - email_address=email_address, - password=auth_code, - imap_server='imap.qq.com', - imap_port=993, - smtp_server='smtp.qq.com', - smtp_port=465 - ) - - -class Email163Manager(EmailManager): - """163 邮箱管理器 - 使用 POP3(因为 IMAP 有安全限制)""" - - def __init__(self, email_address: str, auth_password: str): - super().__init__( - email_address=email_address, - password=auth_password, - imap_server='pop.163.com', # 使用 POP3 - imap_port=995, - smtp_server='smtp.163.com', - smtp_port=465 - ) - self.pop_server = 'pop.163.com' - self.pop_port = 995 - - def receive_emails(self, mailbox: str = 'INBOX', limit: int = 10) -> List[Dict]: - """接收邮件 - 使用 POP3""" - try: - # 连接 POP3 - pop = poplib.POP3_SSL(self.pop_server, self.pop_port) - pop.user(self.email_address) - pop.pass_(self.password) - - # 获取邮件数量 - num_messages = len(pop.list()[1]) - - emails = [] - # 获取最新的 limit 封邮件 - start = max(1, num_messages - limit + 1) - for i in range(start, num_messages + 1): - try: - response, lines, octets = pop.retr(i) - - # 解析邮件 - msg_content = b'\r\n'.join(lines) - msg = email.message_from_bytes(msg_content) - - subject = self.decode_str(msg.get('Subject', '')) - from_ = self.decode_str(msg.get('From', '')) - date = msg.get('Date', '') - content = self.get_email_content(msg) - - emails.append({ - 'id': str(i), - 'subject': subject, - 'from': from_, - 'date': date, - 'content': content - }) - except Exception as e: - # 跳过无法解析的邮件 - continue - - pop.quit() - return emails - - except Exception as e: - raise Exception(f"接收邮件失败: {str(e)}") - - def receive_emails_since(self, since_date: datetime, mailbox: str = 'INBOX') -> List[Dict]: - """接收指定日期之后的邮件 - 使用 POP3""" - try: - # 连接 POP3 - pop = poplib.POP3_SSL(self.pop_server, self.pop_port) - pop.user(self.email_address) - pop.pass_(self.password) - - # 获取邮件数量 - num_messages = len(pop.list()[1]) - - emails = [] - # 从最新的邮件开始检查 - for i in range(num_messages, 0, -1): - try: - response, lines, octets = pop.retr(i) - - # 解析邮件 - msg_content = b'\r\n'.join(lines) - msg = email.message_from_bytes(msg_content) - - # 解析日期 - date_str = msg.get('Date', '') - try: - from email.utils import parsedate_to_datetime - email_date = parsedate_to_datetime(date_str) - - # 如果邮件日期早于指定日期,停止检查 - if email_date < since_date: - break - except: - # 如果无法解析日期,跳过 - continue - - subject = self.decode_str(msg.get('Subject', '')) - from_ = self.decode_str(msg.get('From', '')) - content = self.get_email_content(msg) - - emails.append({ - 'id': str(i), - 'subject': subject, - 'from': from_, - 'date': date_str, - 'content': content - }) - except Exception as e: - # 跳过无法解析的邮件 - continue - - pop.quit() - # 反转列表,使最新的邮件在最后 - return list(reversed(emails)) - - except Exception as e: - raise Exception(f"接收邮件失败: {str(e)}") - - def delete_email(self, email_id: str, mailbox: str = 'INBOX', permanent: bool = False) -> str: - """删除邮件(POP3) - - 注意:POP3 协议的删除始终是永久的,不可恢复。 - permanent 参数对 POP3 无效,仅为保持接口一致性。 - """ - try: - pop = poplib.POP3_SSL(self.pop_server, self.pop_port) - pop.user(self.email_address) - pop.pass_(self.password) - - pop.dele(int(email_id)) - pop.quit() - - return f"✓ 邮件 {email_id} 已永久删除(POP3协议不支持恢复,已从服务器移除)" - - except Exception as e: - raise Exception(f"删除邮件失败: {str(e)}") - - def delete_emails_batch(self, email_ids: List[str], mailbox: str = 'INBOX', permanent: bool = False) -> str: - """批量删除邮件(POP3) - - 注意:POP3 协议的删除始终是永久的,不可恢复。 - permanent 参数对 POP3 无效,仅为保持接口一致性。 - """ - try: - pop = poplib.POP3_SSL(self.pop_server, self.pop_port) - pop.user(self.email_address) - pop.pass_(self.password) - - for eid in email_ids: - pop.dele(int(eid)) - - pop.quit() - return f"✓ 已永久删除 {len(email_ids)} 封邮件(POP3协议不支持恢复,已从服务器移除)" - - except Exception as e: - raise Exception(f"批量删除邮件失败: {str(e)}") - - -def load_config(config_path: str = None) -> Dict: - """加载配置文件""" - if config_path is None: - config_path = os.path.join(os.path.dirname(__file__), 'config.json') - - if not os.path.exists(config_path): - raise FileNotFoundError( - f"配置文件不存在: {config_path}\n" - f"请复制 config.json.example 为 config.json 并填写您的邮箱信息" - ) - - with open(config_path, 'r', encoding='utf-8') as f: - return json.load(f) - - -def save_config(config: Dict, config_path: str = None): - """保存配置文件""" - if config_path is None: - config_path = os.path.join(os.path.dirname(__file__), 'config.json') - - with open(config_path, 'w', encoding='utf-8') as f: - json.dump(config, f, ensure_ascii=False, indent=2) - - -def get_email_manager(email_type: str, config: Dict) -> EmailManager: - """根据邮箱类型获取管理器""" - if email_type == 'qq': - email_config = config['qq_email'] - return QQEmailManager( - email_address=email_config['email'], - auth_code=email_config['auth_code'] - ) - elif email_type == '163': - email_config = config['163_email'] - return Email163Manager( - email_address=email_config['email'], - auth_password=email_config['auth_password'] - ) - else: - raise ValueError(f"不支持的邮箱类型: {email_type},请使用 'qq' 或 '163'") diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/mail.py b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/mail.py deleted file mode 100644 index dc7a8b53..00000000 --- a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/mail.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python3 -"""非交互式邮件管理脚本""" -import argparse -import json -import sys -from datetime import datetime, timedelta -from email_manager import load_config, get_email_manager - - -def cmd_send(args): - """发送邮件""" - try: - config = load_config() - manager = get_email_manager(args.mailbox, config) - - attachments = args.attach if args.attach else None - - manager.send_email( - to_addr=args.to, - subject=args.subject, - content=args.content, - attachments=attachments - ) - - print(f"邮件已发送到 {args.to}") - if attachments: - print(f" 附件: {', '.join(attachments)}") - - except Exception as e: - print(f"发送失败: {e}", file=sys.stderr) - sys.exit(1) - - -def cmd_receive(args): - """接收邮件""" - try: - config = load_config() - manager = get_email_manager(args.mailbox, config) - - emails = manager.receive_emails(limit=args.limit) - - if not emails: - if args.json: - print(json.dumps({"count": 0, "emails": []}, ensure_ascii=False)) - else: - print("收件箱为空") - return - - if args.json: - print(json.dumps({ - "count": len(emails), - "emails": emails - }, ensure_ascii=False, indent=2)) - return - - print(f"收到 {len(emails)} 封邮件:\n") - for i, e in enumerate(emails, 1): - print(f"[{i}] {e['subject']}") - print(f" 发件人: {e['from']}") - print(f" 日期: {e['date']}") - print(f" 内容: {e['content'][:100]}...") - print() - - except Exception as e: - print(f"接收失败: {e}", file=sys.stderr) - sys.exit(1) - - -def cmd_check_new(args): - """检查新邮件""" - try: - config = load_config() - manager = get_email_manager(args.mailbox, config) - - since_date = datetime.now() - timedelta(days=args.since) - new_emails = manager.receive_emails_since(since_date) - - if not new_emails: - if args.json: - print(json.dumps({"count": 0, "since_days": args.since, "emails": []}, ensure_ascii=False)) - else: - print(f"没有新邮件(最近 {args.since} 天)") - return - - if args.json: - print(json.dumps({ - "count": len(new_emails), - "since_days": args.since, - "emails": [ - {"subject": e['subject'], "from": e['from'], "date": e['date']} - for e in new_emails - ] - }, ensure_ascii=False, indent=2)) - return - - print(f"找到 {len(new_emails)} 封新邮件(最近 {args.since} 天):\n") - for i, e in enumerate(new_emails, 1): - print(f"[{i}] {e['subject']}") - print(f" 发件人: {e['from']}") - print(f" 日期: {e['date']}") - print() - - except Exception as e: - print(f"检查失败: {e}", file=sys.stderr) - sys.exit(1) - - -def cmd_delete(args): - """删除邮件""" - try: - config = load_config() - manager = get_email_manager(args.mailbox, config) - - email_ids = args.ids - - if len(email_ids) == 1: - result = manager.delete_email(email_ids[0], permanent=args.permanent) - else: - result = manager.delete_emails_batch(email_ids, permanent=args.permanent) - - print(result) - - except Exception as e: - print(f"删除失败: {e}", file=sys.stderr) - sys.exit(1) - - -def main(): - # 加载配置以获取默认邮箱 - try: - config = load_config() - default_mailbox = config.get('default_mailbox', 'qq') - except: - default_mailbox = 'qq' - - parser = argparse.ArgumentParser( - description='邮件管理工具', - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -示例: - # 发送邮件(使用默认邮箱) - %(prog)s send --to user@example.com --subject "Hello" --content "Test" - - # 发送带附件 - %(prog)s send --to user@example.com --subject "Report" --content "See file" --attach report.pdf - - # 接收最新 5 封邮件 - %(prog)s receive --limit 5 - - # 检查最近 2 天的新邮件 - %(prog)s check-new --since 2 - - # 删除邮件(移到已删除文件夹,可在30天内恢复) - %(prog)s delete --ids 123 - - # 批量删除(移到已删除文件夹) - %(prog)s delete --ids 123 124 125 - - # 彻底删除(不可恢复,立即从服务器移除) - %(prog)s delete --ids 123 --permanent - - # 批量彻底删除 - %(prog)s delete --ids 123 124 125 --permanent - - # 使用 163 邮箱 - %(prog)s send --mailbox 163 --to user@example.com --subject "Test" -""" - ) - - parser.add_argument( - '--mailbox', - choices=['qq', '163'], - default=default_mailbox, - help=f'邮箱类型 (默认: {default_mailbox},可在 config.json 中修改 default_mailbox)' - ) - - subparsers = parser.add_subparsers(dest='command', help='命令') - - # send 命令 - send_parser = subparsers.add_parser('send', help='发送邮件') - send_parser.add_argument('--to', required=True, help='收件人邮箱') - send_parser.add_argument('--subject', required=True, help='邮件主题') - send_parser.add_argument('--content', required=True, help='邮件内容') - send_parser.add_argument('--attach', nargs='+', help='附件文件路径') - - # receive 命令 - receive_parser = subparsers.add_parser('receive', help='接收邮件') - receive_parser.add_argument('--limit', type=int, default=10, help='接收数量 (默认: 10)') - receive_parser.add_argument('--json', action='store_true', help='JSON 格式输出') - - # check-new 命令 - check_parser = subparsers.add_parser('check-new', help='检查新邮件') - check_parser.add_argument('--since', type=int, default=1, help='检查最近 N 天 (默认: 1)') - check_parser.add_argument('--json', action='store_true', help='JSON 格式输出') - - # delete 命令 - delete_parser = subparsers.add_parser('delete', help='删除邮件') - delete_parser.add_argument('--ids', nargs='+', required=True, - help='要删除的邮件 ID(可指定多个,用空格分隔)') - delete_parser.add_argument('--permanent', action='store_true', - help='彻底删除(不可恢复)。不指定此参数时,邮件将移到已删除文件夹,可在30天内恢复。注意:163邮箱使用POP3协议,删除始终是永久的') - - args = parser.parse_args() - - if not args.command: - parser.print_help() - sys.exit(1) - - # 执行命令 - if args.command == 'send': - cmd_send(args) - elif args.command == 'receive': - cmd_receive(args) - elif args.command == 'check-new': - cmd_check_new(args) - elif args.command == 'delete': - cmd_delete(args) - - -if __name__ == '__main__': - main() diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/requirements.txt b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/requirements.txt deleted file mode 100644 index 47365ab0..00000000 --- a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -# 邮箱管理所需的 Python 包 -# 注意: imaplib, smtplib, email 是 Python 标准库,无需安装 diff --git a/libs/hexagent_demo/backend/skills/examples/data-insight-report/SKILL.md b/libs/hexagent_demo/backend/skills/examples/data-insight-report/SKILL.md deleted file mode 100644 index 156c6e10..00000000 --- a/libs/hexagent_demo/backend/skills/examples/data-insight-report/SKILL.md +++ /dev/null @@ -1,965 +0,0 @@ ---- -name: data-insight-report -description: | - Analyze tabular data (Excel, CSV, TSV, etc.) and generate insightful ECharts visualizations embedded in comprehensive data insight reports. - Use when: analyzing spreadsheet data for insights, creating data visualization dashboards, generating interactive chart reports, - building presentation-ready data insight reports, or when user needs to transform tabular data into actionable insights with visualizations. -license: MIT -metadata: - author: Andy Huang - version: "1.3.0" ---- - -# Data Insight Report Generator - -Expert **Data Analyst & Visualization Specialist** that analyzes tabular data (Excel, CSV, TSV, etc.) and generates comprehensive data insight reports with interactive ECharts visualizations and actionable recommendations. - -## When to Use - -- Analyzing Excel/CSV/TSV data for business insights -- Creating data visualization dashboards from spreadsheets -- Generating interactive chart reports for stakeholders -- Exploring and presenting data insights visually -- Building presentation-ready data insight reports with embedded charts -- Transforming raw tabular data into actionable business recommendations - -## Supported Data Formats - -| Format | Extensions | Description | -|--------|------------|-------------| -| Excel | `.xlsx`, `.xlsm`, `.xls` | Microsoft Excel workbooks | -| CSV | `.csv` | Comma-separated values | -| TSV | `.tsv`, `.tab` | Tab-separated values | -| Other | `.ods` | OpenDocument Spreadsheet | - ---- - -## Capability Assessment - Access to Dedicated Documents - -Based on the decomposed requirements, explicitly determine whether it is necessary to read the following specialized capability documents: - -| Capability | When to Read | Document Path | -|------------|--------------|---------------| -| **data-analysis** | When performing ANY data analysis: SQL queries, pandas operations, statistics, data cleaning, transformations, pivot tables, time series, or data exploration | **Must** read `abilities/data-analysis.md` | -| **echarts-validation** | When validating ECharts chart configurations after generation, fixing chart errors, or understanding validation rules | **Must** read `abilities/echarts-validation.md` | - -### Decision Flow - -``` -User Request - │ - ├─► Need to load/inspect data files? ──► Read abilities/data-analysis.md - │ - ├─► Need SQL queries or pandas analysis? ──► Read abilities/data-analysis.md - │ - ├─► Need statistical summaries or correlations? ──► Read abilities/data-analysis.md - │ - ├─► Need data cleaning or transformations? ──► Read abilities/data-analysis.md - │ - ├─► Need to validate chart config? ──► Read abilities/echarts-validation.md - │ - └─► Only need chart templates/report format? ──► Continue with current document -``` - ---- - -## Directory Structure - -``` -data-insight-report/ -├── SKILL.md # Main skill document (this file) -├── abilities/ -│ ├── data-analysis.md # Data analysis capability (SQL + pandas + statistics) -│ └── echarts-validation.md # ECharts validation capability -└── scripts/ - ├── analyze.py # SQL-based data analysis script (DuckDB) - └── validate_echarts.py # ECharts configuration validator -``` - ---- - -## Core Capabilities - -### 1. Data Analysis (via data-analysis ability) - -**Two complementary analysis methods:** - -#### SQL Analysis (Script-Based) -- Quick data inspection via `scripts/analyze.py` -- Schema inspection, data profiling, statistical summaries -- Complex queries with JOINs, CTEs, window functions -- Export to CSV/JSON/Markdown - -#### pandas Analysis (Code-Based) -- Flexible, programmatic data manipulation -- Advanced data cleaning and transformations -- Time series analysis -- Statistical analysis (correlations, hypothesis testing, trends) -- Custom data processing pipelines - -**When to use which:** - -| Use SQL Script when... | Use pandas when... | -|----------------------|-------------------| -| Quick data inspection | Complex transformations | -| Simple aggregations | Custom statistical analysis | -| Large datasets (>100MB) | Iterative exploration | -| One-time queries | Chained operations | -| Direct file export | Conditional logic | - -**To use this capability, read `abilities/data-analysis.md` for detailed instructions.** - -### 2. ECharts Validation (via echarts-validation ability) - -- Automated JSON syntax validation -- ECharts structure validation (required fields, chart types) -- Data integrity validation (length matching, NaN/Infinity detection) -- Layout validation (best practices, performance warnings) - -**To use this capability, read `abilities/echarts-validation.md` for detailed instructions.** - -### 3. ECharts Visualization Generation - -Generate interactive charts using `skills(echart)` embedded directly in Markdown. - -**Chart Quantity Limitation:** -- **CRITICAL**: Generate **4-7 charts maximum** per report -- Focus on the most relevant, high-impact visualizations that directly address the user's question -- Avoid creating tangentially related charts that drift from the core analysis purpose -- Quality over quantity: each chart must provide clear, actionable insight - -**Supported Chart Types:** -- **Line**: Trend analysis, time-series, multi-series comparison -- **Bar**: Categorical ranking, grouped/stacked comparisons -- **Scatter**: Correlation analysis, bubble charts, distribution patterns -- **Pie/Donut**: Composition, market share, proportional breakdown -- **Heatmap**: Correlation matrices, density patterns -- **Radar**: Multi-dimensional performance profiles -- **Funnel**: Conversion rates, process flow -- **Treemap**: Hierarchical part-to-whole relationships -- **Gauge**: KPI indicators, single metrics -- **Box Plot**: Statistical distributions, outlier detection -- **Candlestick**: Financial OHLC analysis - -### 4. Insight Report Generation - -Each report contains: -- **Executive Summary**: Key findings and recommendations overview -- **Data Profile**: Dataset characteristics and quality assessment -- **Key Insights**: Actionable findings with supporting visualizations -- **Chart Sections**: Interactive ECharts + detailed insights per chart -- **Recommendations**: Data-driven action items -- **Methodology Notes**: Analysis approach and assumptions - ---- - -## CRITICAL WORKFLOW - MUST FOLLOW - -### Chart Generation Protocol (MANDATORY) - -**IMPORTANT: You MUST generate charts ONE BY ONE. Never generate all charts at once.** - -The workflow is strictly sequential: - -``` -Data Loading → Analysis → Insight Planning - │ - ├─► Read abilities/data-analysis.md if needed - │ - ▼ -Chart 1 → Generate → Validate (scripts/validate_echarts.py) → Fix (if needed) → Confirm - ↓ -Chart 2 → Generate → Validate → Fix (if needed) → Confirm - ↓ -Chart 3 → Generate → Validate → Fix (if needed) → Confirm - ↓ -... (continue for each chart) - ↓ -Final Insight Report → data_insight_report.md -``` - ---- - -## Phase 1: Data Loading & Profiling - -### Step 1.1: Read Data Analysis Capability - -**CRITICAL: Before performing data analysis, read the capability document:** - -```markdown -Read `abilities/data-analysis.md` for: -- Script usage and parameters -- SQL query patterns -- Statistical summary generation -- Data export options -``` - -### Step 1.2: Load Data - -``` -1. Detect file format based on extension -2. Use scripts/analyze.py to load and inspect data: - python scripts/analyze.py --files /path/to/data.xlsx --action inspect -3. Parse and validate data structure -``` - -### Step 1.3: Profile Data - -Generate a data profile including: -- **Basic Info**: Row count, column count, file size -- **Column Analysis**: Data types, unique values, missing values per column -- **Data Quality**: Duplicate rows, null percentages, data type consistency -- **Statistical Summary**: For numeric columns (min, max, mean, median, std) -- **Sample Data**: First/last few rows for visual confirmation - -**Use the analysis script:** -```bash -python scripts/analyze.py --files /path/to/data.xlsx --action summary --table Sheet1 -``` - -### Step 1.4: Data Cleaning - -Execute cleaning based on profile: -- Handle missing values (impute or remove) -- Remove duplicate records -- Convert data types as needed -- Detect and handle outliers -- Normalize text fields - ---- - -## Phase 2: Insight Planning - -1. Execute statistical analysis using `scripts/analyze.py`: - - Descriptive statistics (mean, median, std, distribution) - - Trend analysis (growth rates, seasonality, forecasts) - - Correlation analysis (variable relationships) - - Segmentation (group comparisons, rankings) - -2. Plan 4-7 key charts to generate (create a plan, do NOT generate yet) - -3. **Create an Insight Plan** - Document before generating: - -```markdown -## 数据洞察计划 - -### 数据概况 -- 总记录数: X -- 字段数: Y -- 时间范围: [如适用] -- 关键指标: [列出核心指标] - -### 洞察方向 -1. [洞察方向1 - 如: 销售趋势分析] -2. [洞察方向2 - 如: 产品类别表现] -3. [洞察方向3 - 如: 区域分布对比] -... - -### 图表规划 - -| 序号 | 图表类型 | 图表标题 | 数据来源 | 核心洞察 | 业务价值 | -|------|----------|----------|----------|----------|----------| -| 1 | line | 销售额趋势 | 字段A, 字段B | 月度销售趋势 | 识别增长/下滑周期 | -| 2 | pie | 产品占比 | 字段C, 字段D | 产品销售占比 | 资源分配决策 | -| ... | ... | ... | ... | ... | ... | -``` - ---- - -## Phase 3: Sequential Chart Generation (STEP BY STEP) - -### Step 3.1: Generate Chart 1 - -**Output to file**: `output_fs/charts/chart_01.md` - -```markdown -# 图表 1: [图表标题] - -## 可视化 - -```echarts -{ - "title": { "text": "[标题]" }, - "tooltip": { "trigger": "axis" }, - "legend": { "data": ["系列名称"] }, - "xAxis": { "type": "category", "data": ["分类1", "分类2"] }, - "yAxis": { "type": "value" }, - "series": [ - { "name": "系列名称", "type": "bar", "data": [100, 200] } - ] -} -``` - -## 数据洞察 - -### 关键发现 -- **发现1**: 描述数据中的关键模式 -- **发现2**: 解释趋势或异常 -- **发现3**: 对比分析结论 - -### 业务影响 -- [业务影响描述] - -### 行动建议 -- [基于数据的可执行建议] - -## 数据来源 - -- 原始数据: [文件名/Sheet名称] -- 数据范围: [具体字段或筛选条件] -- 数据处理: [如: 按月聚合/过滤异常值等] -``` - -### Step 3.2: Validate Chart 1 (MANDATORY - Use Validation Script) - -**CRITICAL: After generating each chart, you MUST run the validation script.** - -```bash -python scripts/validate_echarts.py output_fs/charts/chart_01.md -``` - -#### Validation Output Interpretation - -The script outputs a structured report: - -```markdown -## 校验报告 - [图表标题] - -### JSON语法校验 -- ✅/❌ JSON解析: [状态] -- ✅/❌ 引号检查: [状态] -- ✅/❌ 尾逗号检查: [状态] -- ✅/❌ 函数检查: [状态] - -### ECharts结构校验 -- ✅/❌ title字段: [状态] -- ✅/❌ tooltip字段: [状态] -- ✅/❌ series字段: [状态] -- ✅/❌ series[0].type: [状态] -- ✅/❌ series[0].data: [状态] - -### 数据完整性校验 -- ✅/❌ 数据长度匹配: [状态] -- ✅/❌ NaN检查: [状态] -- ✅/❌ Infinity检查: [状态] -- ✅/❌ 数值类型检查: [状态] - -### 布局校验 -- ✅/❌ grid配置: [状态] -- ✅/❌ 标题长度: [状态] -- ✅/❌ 图例项数: [状态] - -### 校验结果 -✅ 通过 / ❌ 失败 - X 个错误, Y 个警告 -``` - -#### Validation Rules - -| Category | Check | Level | Description | -|----------|-------|-------|-------------| -| JSON语法 | JSON解析 | ERROR | Valid JSON syntax | -| JSON语法 | 引号检查 | ERROR | Must use double quotes | -| JSON语法 | 尾逗号检查 | ERROR | No trailing commas | -| JSON语法 | 函数检查 | ERROR | No JavaScript functions | -| ECharts结构 | series字段 | ERROR | Required, non-empty array | -| ECharts结构 | series[].type | ERROR | Valid chart type | -| ECharts结构 | series[].data | ERROR | Must exist and be array | -| 数据完整性 | 数据长度匹配 | ERROR | xAxis/series data length match | -| 数据完整性 | NaN检查 | ERROR | No NaN values | -| 数据完整性 | Infinity检查 | ERROR | No Infinity values | -| 布局 | grid配置 | WARNING | Avoid complex positioning | -| 布局 | 标题长度 | WARNING | Under 30 characters | -| 布局 | 图例项数 | WARNING | Under 10 items | - -### Step 3.3: Fix Issues (If Validation Fails) - -**If the validation script reports errors, STOP and FIX immediately.** - -Read `abilities/echarts-validation.md` for detailed fix instructions. - -#### Quick Fix Reference - -**Fix 1: JavaScript Functions → String Templates** -```javascript -// ❌ WRONG -"tooltip": { "formatter": function(params) { return params[0].name; } } - -// ✅ CORRECT -"tooltip": { "formatter": "{b}: {c}" } -``` - -**Fix 2: Single Quotes → Double Quotes** -```javascript -// ❌ WRONG -{ 'name': 'Sales', 'type': 'bar' } - -// ✅ CORRECT -{ "name": "Sales", "type": "bar" } -``` - -**Fix 3: Trailing Commas** -```javascript -// ❌ WRONG -"data": [1, 2, 3,], -"series": [{ "name": "A" },] - -// ✅ CORRECT -"data": [1, 2, 3], -"series": [{ "name": "A" }] -``` - -**Fix 4: Data Length Mismatch** -```javascript -// ❌ WRONG: 3 categories but 4 data points -"xAxis": { "data": ["A", "B", "C"] }, -"series": [{ "data": [10, 20, 30, 40] }] - -// ✅ CORRECT: Align data -"xAxis": { "data": ["A", "B", "C", "D"] }, -"series": [{ "data": [10, 20, 30, 40] }] -``` - -**Fix 5: Layout Issues** -```javascript -// ❌ WRONG: Overly complex manual positioning -"grid": { "left": "3%", "right": "4%", "bottom": "3%", "top": "15%" } - -// ✅ CORRECT: Let ECharts handle it -"grid": { "containLabel": true } -``` - -**Fix 6: String Numbers** -```javascript -// ❌ WRONG: Numbers as strings -"data": ["100", "200", "300"] - -// ✅ CORRECT: Actual numbers -"data": [100, 200, 300] -``` - -### Step 3.4: Re-validate After Fix - -After fixing, run the validation script again: - -```bash -python scripts/validate_echarts.py output_fs/charts/chart_01.md -``` - -Repeat until validation passes with `✅ 通过`. - -### Step 3.5: Confirm and Proceed - -After validation passes, record success and proceed to next chart: - -```markdown -## 图表 1 完成 ✅ -文件: output_fs/charts/chart_01.md -状态: 已校验通过 ---- -继续生成图表 2... -``` - -### Step 3.6: Repeat for Each Chart - -Repeat steps 3.1-3.5 for each planned chart: -- Chart 2 → `output_fs/charts/chart_02.md` -- Chart 3 → `output_fs/charts/chart_03.md` -- ... -- Chart N → `output_fs/charts/chart_N.md` - ---- - -## Phase 4: Final Insight Report Generation - -After ALL charts are generated and validated individually: - -### Step 4.1: Verify All Charts - -Run validation on all charts: - -```bash -python scripts/validate_echarts.py output_fs/charts/chart_01.md -python scripts/validate_echarts.py output_fs/charts/chart_02.md -python scripts/validate_echarts.py output_fs/charts/chart_03.md -# ... etc -``` - -Create status summary: - -```markdown -## 图表完成状态汇总 - -| 图表 | 文件 | 状态 | -|------|------|------| -| 图表 1 | chart_01.md | ✅ 通过 | -| 图表 2 | chart_02.md | ✅ 通过 | -| 图表 3 | chart_03.md | ✅ 通过 | -| ... | ... | ... | -``` - -### Step 4.2: Generate Unified Insight Report - -**Output to file**: `output_fs/data_insight_report.md` - -Merge all charts and analytical insights into a single comprehensive report: - -```markdown -# 数据洞察报告 - -## 执行摘要 - -[3-5句话概述核心发现和最重要的建议] - ---- - -## 数据概况 - -### 基本信息 - -| 指标 | 数值 | -|------|------| -| 数据来源 | [文件名] | -| 总记录数 | X | -| 字段数 | Y | -| 时间范围 | YYYY-MM-DD ~ YYYY-MM-DD | -| 数据质量评分 | [高/中/低] | - -### 关键指标概览 - -| 指标名称 | 当前值 | 变化趋势 | 备注 | -|----------|--------|----------|------| -| 指标1 | XXX | ↑/↓/→ | 说明 | -| 指标2 | XXX | ↑/↓/→ | 说明 | -| ... | ... | ... | ... | - ---- - -## 核心洞察 - -### 洞察 1: [标题] - -**发现**: [关键发现描述] - -**数据支撑**: [具体数据] - -**业务影响**: [对业务的实际影响] - -**建议行动**: [可执行的具体建议] - -### 洞察 2: [标题] - -[同上结构] - ---- - -## 数据可视化分析 - -本节包含 [N] 个交互式图表,展示数据分析的关键发现。 - ---- - -### 图表 1: [标题] - -#### 可视化 - -```echarts -{ ... validated JSON ... } -``` - -#### 数据洞察 - -- **关键发现**: ... -- **趋势分析**: ... -- **业务建议**: ... - ---- - -### 图表 2: [标题] - -#### 可视化 - -```echarts -{ ... validated JSON ... } -``` - -#### 数据洞察 - -- **关键发现**: ... -- **趋势分析**: ... -- **业务建议**: ... - ---- - -... (continue for all charts) - ---- - -## 综合结论 - -### 核心洞察总结 - -1. [洞察1总结] -2. [洞察2总结] -3. [洞察3总结] - -### 行动建议优先级 - -| 优先级 | 建议行动 | 预期影响 | 实施难度 | -|--------|----------|----------|----------| -| 高 | [建议1] | [影响描述] | 低/中/高 | -| 中 | [建议2] | [影响描述] | 低/中/高 | -| 低 | [建议3] | [影响描述] | 低/中/高 | - -### 后续分析方向 - -- [可选的进一步分析建议1] -- [可选的进一步分析建议2] -- [可选的进一步分析建议3] - ---- - -## 方法论 - -### 分析方法 - -[说明使用的分析方法和技术] - -### 数据处理 - -[说明数据清洗和转换过程] - -### 局限性 - -[说明分析的局限性和假设] - ---- - -## 附录 - -### 数据质量报告 - -[数据质量详情] - -### 技术细节 - -[补充技术信息] -``` - ---- - -## ECharts JSON Configuration Rules - -**CRITICAL**: ECharts configurations MUST be valid JSON. They **MUST NOT** contain JavaScript functions. - -### Prohibited Patterns - -```javascript -// ❌ WRONG - Functions break JSON parsing -"tooltip": { - "formatter": function(params) { return params[0].name; } -} -``` - -### Correct Patterns - -```javascript -// ✅ CORRECT - Use string templates -"tooltip": { - "formatter": "{b}: {c}" -} - -// ✅ CORRECT - Or rely on default tooltip behavior -"tooltip": { "trigger": "axis" } -``` - ---- - -## Safe Chart Templates (Use These) - -### Template 1: Bar Chart - -```json -{ - "title": { "text": "图表标题" }, - "tooltip": { "trigger": "axis" }, - "legend": { "data": ["系列名称"] }, - "xAxis": { "type": "category", "data": ["类别1", "类别2", "类别3"] }, - "yAxis": { "type": "value" }, - "series": [{ "name": "系列名称", "type": "bar", "data": [100, 200, 300] }] -} -``` - -### Template 2: Line Chart - -```json -{ - "title": { "text": "图表标题" }, - "tooltip": { "trigger": "axis" }, - "legend": { "data": ["系列名称"] }, - "xAxis": { "type": "category", "data": ["一月", "二月", "三月"] }, - "yAxis": { "type": "value" }, - "series": [{ "name": "系列名称", "type": "line", "data": [100, 200, 300] }] -} -``` - -### Template 3: Pie Chart - -```json -{ - "title": { "text": "图表标题" }, - "tooltip": { "trigger": "item" }, - "legend": { "data": ["类别A", "类别B", "类别C"] }, - "series": [{ - "type": "pie", - "radius": "50%", - "data": [ - { "name": "类别A", "value": 100 }, - { "name": "类别B", "value": 200 }, - { "name": "类别C", "value": 300 } - ] - }] -} -``` - -### Template 4: Donut Chart - -```json -{ - "title": { "text": "图表标题" }, - "tooltip": { "trigger": "item" }, - "legend": { "data": ["类别A", "类别B"] }, - "series": [{ - "type": "pie", - "radius": ["40%", "70%"], - "data": [ - { "name": "类别A", "value": 100 }, - { "name": "类别B", "value": 200 } - ] - }] -} -``` - -### Template 5: Grouped Bar Chart - -```json -{ - "title": { "text": "图表标题" }, - "tooltip": { "trigger": "axis" }, - "legend": { "data": ["系列A", "系列B"] }, - "xAxis": { "type": "category", "data": ["类别1", "类别2", "类别3"] }, - "yAxis": { "type": "value" }, - "series": [ - { "name": "系列A", "type": "bar", "data": [100, 200, 300] }, - { "name": "系列B", "type": "bar", "data": [150, 250, 350] } - ] -} -``` - ---- - -## Layout Rules (Simplified) - -**CRITICAL PRINCIPLE: If unsure about layout, DO NOT configure it. Use ECharts defaults.** - -### Core Rules - -1. **Default is Best** - ECharts handles most layouts automatically. Only override when necessary. -2. **Minimal Configuration** - Only add layout properties you fully understand. -3. **Test Before Customizing** - If uncertain about positioning, omit the property entirely. - -### When to Add Layout (Only if certain) - -**Only add `grid` if:** -- Axis labels are being cut off AND you know the exact margin needed -- You need precise control over chart dimensions - -**Safe grid configuration (use sparingly):** -```json -{ "grid": { "containLabel": true } } -``` - -**DO NOT manually set `left`, `right`, `top`, `bottom` unless you have verified the values work.** - -### What NOT to Do - -❌ DO NOT add layout properties "just in case" -❌ DO NOT copy complex layout configurations from examples without understanding them -❌ DO NOT try to fix overlap issues by guessing margin values - ---- - -## Color Palette - -Professional color palette for charts: -```json -{"color": ["#5470c6", "#91cc75", "#fac858", "#ee6666", "#73c0de", "#3ba272", "#fc8452", "#9a60b4", "#ea7ccc"]} -``` - ---- - -## Data Embedding - -- Chart data arrays included directly in ECharts JSON configuration -- No external file references -- Use responsive width: 100% container -- Ensure data is properly escaped for JSON format - ---- - -## Best Practices - -1. **Chart Selection**: Choose chart type based on data characteristics - - Trends over time → Line chart - - Category comparison → Bar chart - - Part-to-whole → Pie/Donut chart - - Correlation → Scatter plot - - Distribution → Box plot or histogram - -2. **Chart Quantity Control** (CRITICAL): - - **Generate 4-7 charts maximum** per report - - Each chart must directly address the user's question or business objective - - Do NOT create charts for data exploration purposes - focus on key insights - - Avoid redundant visualizations showing similar information - - When in doubt, prioritize relevance over completeness - -3. **Sequential Generation** (MANDATORY): - - Generate ONE chart at a time - - Validate each chart with `scripts/validate_echarts.py` before proceeding - - Fix any issues immediately - - Merge only after all charts pass validation - -4. **Validation** (MANDATORY): - - **ALWAYS** run validation script after generating each chart - - Fix all ERROR level issues before proceeding - - Address WARNING level issues when possible - - Re-validate after fixes - -5. **Insight Quality**: - - Every insight should be actionable - - Support insights with specific data points - - Connect findings to business impact - - Provide clear recommendations - -6. **Accessibility**: Include clear titles, labels, and legends -7. **Responsive Design**: Use percentage-based dimensions -8. **Color Consistency**: Use consistent colors for same categories across charts -9. **Data Labels**: Show values when precision matters - ---- - -## Dependencies - -Use these skills for implementation: -- `skills(spreadsheet)` - For Excel/CSV file reading and manipulation -- `skills(echart)` - For ECharts visualization generation - -### Internal Capabilities - -- **abilities/data-analysis.md** - SQL-based data analysis with DuckDB -- **abilities/echarts-validation.md** - ECharts configuration validation rules and fixes -- **scripts/analyze.py** - Data analysis script for inspecting, querying, and summarizing data -- **scripts/validate_echarts.py** - ECharts configuration validation script - ---- - -## Example Usage - -``` -User: Analyze the sales data in data/sales.xlsx and create an insight report - -Agent: -1. Read abilities/data-analysis.md for data analysis instructions -2. Load Excel file using scripts/analyze.py --action inspect -3. Profile data structure and quality using --action summary -4. Clean data (handle missing values, duplicates, outliers) -5. Execute statistical analysis with SQL queries -6. Create Insight Plan with 4-7 charts -7. FOR EACH chart (one by one): - a. Generate chart_N.md with ECharts JSON - b. Run: python scripts/validate_echarts.py output_fs/charts/chart_N.md - c. Review validation report - d. Fix any ERROR level issues - e. Re-validate until ✅ 通过 - f. Confirm chart passes -8. Generate unified data_insight_report.md with: - - Executive summary - - Data profile - - Key insights with business impact - - All validated charts with insights - - Prioritized action recommendations - - Methodology and limitations -9. Save files to output_fs/ -``` - -``` -User: 分析 data/transactions.csv 文件,生成数据洞察报告 - -Agent: -1. 读取 abilities/data-analysis.md 了解数据分析方法 -2. 使用 scripts/analyze.py 加载 CSV 文件 -3. 分析数据结构和质量 -4. 数据清洗和预处理 -5. 执行探索性数据分析 -6. 制定洞察计划(4-7个图表) -7. 逐个生成图表: - a. 生成 chart_N.md - b. 运行校验: python scripts/validate_echarts.py output_fs/charts/chart_N.md - c. 查看校验报告 - d. 修复所有错误 - e. 重新校验直到通过 - f. 确认图表校验通过 -8. 生成完整的数据洞察报告 -9. 保存至 output_fs/data_insight_report.md -``` - ---- - -## Output Files Structure - -``` -output_fs/ -├── charts/ -│ ├── chart_01.md # Individual chart (validated) -│ ├── chart_02.md # Individual chart (validated) -│ ├── chart_03.md # Individual chart (validated) -│ └── ... -└── data_insight_report.md # Unified insight report with all charts and recommendations -``` - ---- - -## Changelog - -### v1.3.0 (Current) -- Enhanced data-analysis capability with comprehensive pandas support -- Added pandas analysis patterns: data loading, cleaning, transformation, grouping, time series -- Added statistical analysis examples: correlation, trend analysis, outlier detection, segmentation -- Added decision guidance for when to use SQL script vs pandas code -- Updated capability assessment to cover all data analysis scenarios - -### v1.2.0 -- Added echarts-validation capability with dedicated ability document -- Added scripts/validate_echarts.py for automated chart validation -- Integrated validation script into chart generation workflow -- Added validation rules reference table -- Enhanced workflow to mandate script-based validation after each chart - -### v1.1.0 -- Integrated data-analysis capability with dedicated ability document -- Added scripts/analyze.py for SQL-based data analysis -- Added abilities/ directory for modular capability documentation -- Added capability assessment section with decision flow -- Added TSV file format support -- Enhanced data profiling with DuckDB SQL engine - -### v1.0.0 -- Initial release: Renamed from `excel-echarts-report` to `data-insight-report` -- Added support for multiple data formats (Excel, CSV, TSV, ODS) -- Enhanced data profiling and quality assessment -- Added structured insight planning phase -- Improved report structure with executive summary and prioritized recommendations - ---- - -*Created for tabular data analysis with ECharts visualization* -*Version 1.3.0 - Enhanced pandas analysis with comprehensive statistical support* diff --git a/libs/hexagent_demo/backend/skills/examples/data-insight-report/abilities/data-analysis.md b/libs/hexagent_demo/backend/skills/examples/data-insight-report/abilities/data-analysis.md deleted file mode 100644 index 36aeb108..00000000 --- a/libs/hexagent_demo/backend/skills/examples/data-insight-report/abilities/data-analysis.md +++ /dev/null @@ -1,646 +0,0 @@ -# Data Analysis Ability - -You are an expert data analyst with expertise in SQL, Python (pandas), and statistical analysis. - -## When to Apply - -Use this ability when: -- Writing SQL queries for data extraction -- Analyzing datasets with pandas -- Performing statistical analysis -- Creating data transformations -- Identifying data patterns and insights -- Data cleaning and preparation - -## Core Competencies - -### SQL Analysis (via Script) -- Schema inspection and data profiling -- Complex queries with JOINs, subqueries, CTEs -- Window functions and aggregations -- Statistical summaries -- Result export to CSV/JSON/Markdown - -### pandas Analysis (via Code) -- Data manipulation and transformation -- Grouping, filtering, pivoting -- Time series analysis -- Handling missing data -- Custom statistical analysis -- Data visualization preparation - -### Statistics -- Descriptive statistics -- Hypothesis testing -- Correlation analysis -- Trend analysis -- Outlier detection - ---- - -## Supported Data Formats - -| Format | Extensions | Description | -|--------|------------|-------------| -| Excel | `.xlsx`, `.xls`, `.xlsm` | Microsoft Excel workbooks | -| CSV | `.csv` | Comma-separated values | -| TSV | `.tsv`, `.tab` | Tab-separated values | -| JSON | `.json` | JSON files | -| Parquet | `.parquet` | Apache Parquet files | - ---- - -## Method 1: SQL Analysis (Script-Based) - -Use `scripts/analyze.py` for quick SQL-based analysis with DuckDB. - -### Script Location - -``` -scripts/analyze.py -``` - -### Actions - -| Action | Description | -|--------|-------------| -| `inspect` | View schema, columns, types, sample data | -| `query` | Execute SQL queries | -| `summary` | Generate statistical summaries | - -### Usage Examples - -#### Inspect File Structure - -```bash -python scripts/analyze.py \ - --files /path/to/data.xlsx \ - --action inspect -``` - -Returns: sheet names, columns, data types, row counts, sample data. - -#### Execute SQL Query - -```bash -python scripts/analyze.py \ - --files /path/to/data.xlsx \ - --action query \ - --sql "SELECT category, COUNT(*) as count, AVG(amount) as avg_amount FROM Sheet1 GROUP BY category ORDER BY count DESC" -``` - -#### Generate Statistical Summary - -```bash -python scripts/analyze.py \ - --files /path/to/data.xlsx \ - --action summary \ - --table Sheet1 -``` - -#### Export Results - -```bash -python scripts/analyze.py \ - --files /path/to/data.xlsx \ - --action query \ - --sql "SELECT * FROM Sheet1 WHERE amount > 1000" \ - --output-file /path/to/output/results.csv -``` - -### SQL Analysis Patterns - -#### Basic Exploration - -```sql --- Row count -SELECT COUNT(*) FROM Sheet1 - --- Distinct values -SELECT DISTINCT category FROM Sheet1 - --- Value distribution -SELECT category, COUNT(*) as cnt -FROM Sheet1 -GROUP BY category -ORDER BY cnt DESC - --- Date range -SELECT MIN(date_col), MAX(date_col) FROM Sheet1 -``` - -#### Aggregation & Grouping - -```sql --- Revenue by category and month -SELECT category, - DATE_TRUNC('month', order_date) as month, - SUM(revenue) as total_revenue -FROM Sales -GROUP BY category, month -ORDER BY month, total_revenue DESC - --- Top 10 customers -SELECT customer_name, SUM(amount) as total_spend -FROM Orders -GROUP BY customer_name -ORDER BY total_spend DESC -LIMIT 10 -``` - -#### Window Functions - -```sql --- Running total and rank -SELECT order_date, amount, - SUM(amount) OVER (ORDER BY order_date) as running_total, - RANK() OVER (ORDER BY amount DESC) as amount_rank -FROM Sales -``` - ---- - -## Method 2: pandas Analysis (Code-Based) - -Use pandas for flexible, programmatic data analysis with Python code. - -### When to Use pandas vs SQL - -| Use pandas when... | Use SQL when... | -|-------------------|-----------------| -| Need complex data transformations | Quick aggregations and filtering | -| Custom statistical analysis | Standard summaries | -| Data cleaning with conditional logic | Simple joins and grouping | -| Time series manipulation | Large datasets (DuckDB is faster) | -| Iterative exploration | One-time queries | -| Need to chain multiple operations | Export to file directly | - -### Loading Data - -```python -import pandas as pd - -# Excel -df = pd.read_excel('data.xlsx', sheet_name='Sheet1') -df = pd.read_excel('data.xlsx', sheet_name=None) # All sheets as dict - -# CSV -df = pd.read_csv('data.csv') -df = pd.read_csv('data.csv', encoding='utf-8', parse_dates=['date_col']) - -# TSV -df = pd.read_csv('data.tsv', sep='\t') - -# JSON -df = pd.read_json('data.json') -``` - -### Data Inspection - -```python -# Basic info -df.info() -df.shape -df.columns.tolist() -df.dtypes - -# Preview -df.head(10) -df.tail(5) -df.sample(5) - -# Statistical summary -df.describe() -df.describe(include='all') - -# Missing values -df.isnull().sum() -df.isnull().mean() * 100 # Percentage - -# Unique values -df['column'].nunique() -df['column'].value_counts() -``` - -### Data Cleaning - -```python -# Handle missing values -df.dropna() # Drop rows with any missing -df.dropna(subset=['col1', 'col2']) # Drop if specific columns missing -df.fillna(0) # Fill with value -df.fillna({'col1': 0, 'col2': 'unknown'}) # Fill per column -df['col'].fillna(df['col'].mean()) # Fill with mean - -# Remove duplicates -df.drop_duplicates() -df.drop_duplicates(subset=['id']) -df.duplicated().sum() # Count duplicates - -# Type conversion -df['date'] = pd.to_datetime(df['date']) -df['amount'] = pd.to_numeric(df['amount'], errors='coerce') -df['category'] = df['category'].astype('category') - -# String cleaning -df['name'] = df['name'].str.strip() -df['name'] = df['name'].str.lower() -df['name'] = df['name'].str.replace(r'\s+', ' ', regex=True) - -# Outlier handling -q1 = df['amount'].quantile(0.25) -q3 = df['amount'].quantile(0.75) -iqr = q3 - q1 -df_clean = df[(df['amount'] >= q1 - 1.5*iqr) & (df['amount'] <= q3 + 1.5*iqr)] -``` - -### Data Transformation - -```python -# Filtering -df[df['amount'] > 100] -df[(df['amount'] > 100) & (df['category'] == 'A')] -df.query('amount > 100 and category == "A"') -df[~df['category'].isin(['A', 'B'])] # Exclude - -# Sorting -df.sort_values('amount', ascending=False) -df.sort_values(['category', 'amount'], ascending=[True, False]) - -# Column operations -df['new_col'] = df['col1'] + df['col2'] -df['ratio'] = df['amount'] / df['total'] -df['log_amount'] = np.log(df['amount']) - -# Rename columns -df.rename(columns={'old_name': 'new_name'}) -df.columns = ['col1', 'col2', 'col3'] # All at once - -# Select/reorder columns -df[['col1', 'col2', 'col3']] -df.drop(columns=['col1', 'col2']) -``` - -### Grouping & Aggregation - -```python -# Basic groupby -df.groupby('category')['amount'].sum() -df.groupby('category')['amount'].agg(['sum', 'mean', 'count']) - -# Multiple columns -df.groupby(['category', 'region'])['amount'].sum() - -# Multiple aggregations -df.groupby('category').agg({ - 'amount': ['sum', 'mean', 'std'], - 'quantity': ['sum', 'count'], - 'date': ['min', 'max'] -}) - -# Custom aggregations -df.groupby('category')['amount'].agg( - total='sum', - average='mean', - count='count', - range_=lambda x: x.max() - x.min() -) - -# Transform (preserve shape) -df['category_avg'] = df.groupby('category')['amount'].transform('mean') - -# Filter groups -df.groupby('category').filter(lambda x: x['amount'].sum() > 1000) -``` - -### Pivot Tables & Cross-tabs - -```python -# Pivot table -df.pivot_table( - values='amount', - index='category', - columns='region', - aggfunc='sum', - fill_value=0 -) - -# Multiple aggregations -df.pivot_table( - values='amount', - index='category', - columns='region', - aggfunc=['sum', 'mean', 'count'] -) - -# Cross-tabulation -pd.crosstab(df['category'], df['region']) -pd.crosstab(df['category'], df['region'], normalize='index') # Row percentages -``` - -### Time Series Analysis - -```python -# Convert to datetime -df['date'] = pd.to_datetime(df['date']) - -# Set datetime index -df = df.set_index('date') - -# Resample (time-based grouping) -df.resample('M')['amount'].sum() # Monthly -df.resample('W')['amount'].mean() # Weekly -df.resample('Q')['amount'].sum() # Quarterly - -# Rolling windows -df['rolling_avg'] = df['amount'].rolling(window=7).mean() -df['rolling_sum'] = df['amount'].rolling(window=30).sum() - -# Date components -df['year'] = df['date'].dt.year -df['month'] = df['date'].dt.month -df['day'] = df['date'].dt.day -df['weekday'] = df['date'].dt.day_name() -df['quarter'] = df['date'].dt.quarter - -# Shift (lag/lead) -df['prev_amount'] = df['amount'].shift(1) -df['pct_change'] = df['amount'].pct_change() -``` - -### Statistical Analysis - -```python -import numpy as np -from scipy import stats - -# Descriptive statistics -df['amount'].describe() -df['amount'].mean() -df['amount'].median() -df['amount'].std() -df['amount'].var() -df['amount'].quantile([0.25, 0.5, 0.75]) - -# Correlation -df.corr() # Correlation matrix -df.corr()['target'] # Correlation with target -df[['col1', 'col2', 'col3']].corr() - -# Covariance -df.cov() - -# Hypothesis testing -stats.ttest_ind(df[df['group'] == 'A']['amount'], - df[df['group'] == 'B']['amount']) - -stats.chi2_contingency(pd.crosstab(df['cat1'], df['cat2'])) - -# Normality test -stats.normaltest(df['amount']) - -# Correlation test -stats.pearsonr(df['col1'], df['col2']) -stats.spearmanr(df['col1'], df['col2']) -``` - -### Data Merging - -```python -# Concatenate -pd.concat([df1, df2], axis=0) # Stack vertically -pd.concat([df1, df2], axis=1) # Side by side - -# Merge (SQL-style joins) -pd.merge(df1, df2, on='key') # Inner join -pd.merge(df1, df2, on='key', how='left') # Left join -pd.merge(df1, df2, on='key', how='right') # Right join -pd.merge(df1, df2, on='key', how='outer') # Full outer join - -# Merge on different column names -pd.merge(df1, df2, left_on='id', right_on='customer_id') - -# Join on index -df1.join(df2, on='key') -``` - -### Exporting Data - -```python -# To CSV -df.to_csv('output.csv', index=False) -df.to_csv('output.csv', index=False, encoding='utf-8-sig') - -# To Excel -df.to_excel('output.xlsx', index=False, sheet_name='Sheet1') - -# To JSON -df.to_json('output.json', orient='records', indent=2) - -# To Markdown table -print(df.to_markdown(index=False)) - -# To HTML -df.to_html('output.html', index=False) -``` - ---- - -## Method 3: Statistical Analysis Examples - -### Correlation Analysis - -```python -import pandas as pd -import numpy as np - -# Load data -df = pd.read_excel('data.xlsx') - -# Correlation matrix -corr_matrix = df.select_dtypes(include=[np.number]).corr() - -# Find highly correlated pairs -high_corr = [] -for i in range(len(corr_matrix.columns)): - for j in range(i+1, len(corr_matrix.columns)): - if abs(corr_matrix.iloc[i, j]) > 0.7: - high_corr.append({ - 'var1': corr_matrix.columns[i], - 'var2': corr_matrix.columns[j], - 'correlation': corr_matrix.iloc[i, j] - }) - -high_corr_df = pd.DataFrame(high_corr) -print(high_corr_df) -``` - -### Trend Analysis - -```python -import pandas as pd -from scipy import stats - -df = pd.read_excel('sales.xlsx') -df['date'] = pd.to_datetime(df['date']) -df = df.sort_values('date') - -# Monthly trend -monthly = df.resample('M', on='date')['amount'].sum().reset_index() - -# Linear regression for trend -x = np.arange(len(monthly)) -y = monthly['amount'].values -slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) - -print(f"Trend slope: {slope:.2f} per month") -print(f"R-squared: {r_value**2:.3f}") -print(f"P-value: {p_value:.4f}") - -# Trend direction -if slope > 0 and p_value < 0.05: - print("Significant upward trend") -elif slope < 0 and p_value < 0.05: - print("Significant downward trend") -else: - print("No significant trend") -``` - -### Outlier Detection - -```python -import pandas as pd -import numpy as np - -df = pd.read_excel('data.xlsx') - -# IQR method -def detect_outliers_iqr(series, multiplier=1.5): - q1 = series.quantile(0.25) - q3 = series.quantile(0.75) - iqr = q3 - q1 - lower = q1 - multiplier * iqr - upper = q3 + multiplier * iqr - return (series < lower) | (series > upper) - -# Z-score method -def detect_outliers_zscore(series, threshold=3): - z_scores = np.abs((series - series.mean()) / series.std()) - return z_scores > threshold - -# Apply -for col in df.select_dtypes(include=[np.number]).columns: - outliers_iqr = detect_outliers_iqr(df[col]) - outliers_zscore = detect_outliers_zscore(df[col]) - print(f"{col}: {outliers_iqr.sum()} outliers (IQR), {outliers_zscore.sum()} outliers (Z-score)") -``` - -### Segmentation Analysis - -```python -import pandas as pd - -df = pd.read_excel('customers.xlsx') - -# RFM-like segmentation -df['recency_score'] = pd.qcut(df['days_since_last_purchase'], 5, labels=[5,4,3,2,1]) -df['frequency_score'] = pd.qcut(df['purchase_count'].rank(method='first'), 5, labels=[1,2,3,4,5]) -df['monetary_score'] = pd.qcut(df['total_spent'].rank(method='first'), 5, labels=[1,2,3,4,5]) - -df['rfm_score'] = df['recency_score'].astype(str) + df['frequency_score'].astype(str) + df['monetary_score'].astype(str) - -# Segment summary -segments = df.groupby('rfm_score').agg({ - 'customer_id': 'count', - 'total_spent': ['mean', 'sum'], - 'purchase_count': 'mean' -}).round(2) - -print(segments) -``` - ---- - -## Analysis Workflow Recommendation - -### For Quick Analysis (Use SQL Script) - -1. **Inspect**: `python scripts/analyze.py --files data.xlsx --action inspect` -2. **Summary**: `python scripts/analyze.py --files data.xlsx --action summary --table Sheet1` -3. **Query**: Write SQL for aggregations and filtering -4. **Export**: Use `--output-file` for results - -### For Complex Analysis (Use pandas) - -1. **Load**: `pd.read_excel()` or `pd.read_csv()` -2. **Clean**: Handle missing values, duplicates, type conversions -3. **Explore**: `describe()`, `info()`, `value_counts()` -4. **Transform**: Group, pivot, merge, calculate new columns -5. **Analyze**: Statistical tests, correlation, trends -6. **Export**: `to_csv()`, `to_excel()`, `to_markdown()` - -### When to Combine Both - -``` -1. Use SQL script for initial inspection and quick queries -2. Export subset of data for deeper pandas analysis -3. Use pandas for custom transformations and statistics -4. Use SQL for final aggregations if working with large data -``` - ---- - -## Parameters Reference - -### SQL Script Parameters - -| Parameter | Required | Description | -|-----------|----------|-------------| -| `--files` | Yes | Space-separated paths to data files | -| `--action` | Yes | One of: `inspect`, `query`, `summary` | -| `--sql` | For `query` | SQL query to execute | -| `--table` | For `summary` | Table/sheet name to summarize | -| `--output-file` | No | Path to export results (CSV/JSON/MD) | - -### Common pandas Parameters - -```python -# read_excel -pd.read_excel(file, sheet_name=0, header=0, usecols=None, dtype=None, parse_dates=None) - -# read_csv -pd.read_csv(file, sep=',', header=0, encoding=None, parse_dates=None, chunksize=None) - -# to_csv -df.to_csv(file, index=True, encoding='utf-8', sep=',') - -# to_excel -df.to_excel(file, sheet_name='Sheet1', index=True) -``` - ---- - -## Notes - -- DuckDB (SQL script) is faster for large datasets (100MB+) -- pandas is more flexible for complex transformations -- Use SQL for joins and simple aggregations -- Use pandas for custom logic and iterative analysis -- Cache is automatic for SQL script — repeated queries are instant - ---- - -## Integration with Data Insight Report - -When using this ability within the `data-insight-report` skill: - -1. Use SQL script for initial data profiling (`inspect`, `summary`) -2. Use pandas for complex transformations and statistical analysis -3. Export processed data to CSV for chart generation -4. Document findings for insight planning phase - -The analysis results feed directly into: -- Data quality assessment -- Insight planning -- Chart data preparation -- Statistical insights for the report diff --git a/libs/hexagent_demo/backend/skills/examples/data-insight-report/abilities/echarts-validation.md b/libs/hexagent_demo/backend/skills/examples/data-insight-report/abilities/echarts-validation.md deleted file mode 100644 index fc6bd033..00000000 --- a/libs/hexagent_demo/backend/skills/examples/data-insight-report/abilities/echarts-validation.md +++ /dev/null @@ -1,285 +0,0 @@ -# ECharts Validation Ability - -## Overview - -This ability provides automated validation for ECharts JSON configurations to ensure they are syntactically correct, structurally valid, and follow best practices before embedding in reports. - -## Script Location - -``` -scripts/validate_echarts.py -``` - -## Core Capabilities - -- **JSON Syntax Validation**: Detects common JSON errors (single quotes, trailing commas, functions, undefined) -- **ECharts Structure Validation**: Validates required fields and chart type configurations -- **Data Integrity Validation**: Checks data length consistency, NaN/Infinity values, type correctness -- **Layout Validation**: Warns about complex layouts and performance issues - -## Usage - -### Basic Usage - -```bash -# Validate a JSON file -python scripts/validate_echarts.py chart_config.json - -# Validate a Markdown file with echarts code block -python scripts/validate_echarts.py output_fs/charts/chart_01.md - -# Validate JSON from stdin -echo '{"title":{"text":"test"},"series":[{"type":"bar","data":[1,2,3]}]}' | python scripts/validate_echarts.py - - -# Output as JSON format -python scripts/validate_echarts.py chart_config.json --format json - -# Strict mode (exit error on warnings too) -python scripts/validate_echarts.py chart_config.json --strict -``` - -### Parameters - -| Parameter | Description | -|-----------|-------------| -| `file` | Path to file containing ECharts config (JSON or Markdown with ```echarts block), or `-` for stdin | -| `--format, -f` | Output format: `markdown` (default) or `json` | -| `--strict, -s` | Exit with error code on any validation failure (including warnings) | - -### Exit Codes - -| Code | Meaning | -|------|---------| -| 0 | All validations passed | -| 1 | One or more ERROR level failures | -| 2 | All passed but WARNING level failures (only with `--strict`) | - -## Validation Categories - -### 1. JSON Syntax Validation (JSON语法校验) - -| Check | Level | Description | -|-------|-------|-------------| -| JSON解析 | ERROR | Valid JSON syntax | -| 引号检查 | ERROR | Must use double quotes, no single quotes | -| 尾逗号检查 | ERROR | No trailing commas in arrays/objects | -| 函数检查 | ERROR | No JavaScript functions allowed | -| undefined检查 | ERROR | No `undefined` values | - -### 2. ECharts Structure Validation (ECharts结构校验) - -| Check | Level | Description | -|-------|-------|-------------| -| title字段 | WARNING | Recommended to have title | -| tooltip字段 | WARNING | Recommended to have tooltip | -| series字段 | ERROR | Required, must be non-empty array | -| series[].type | ERROR | Must be valid ECharts chart type | -| series[].data | ERROR | Must exist and be array | -| xAxis/yAxis配置 | ERROR | Required for bar/line/scatter charts | -| 饼图数据格式 | WARNING | Pie data should have name/value | - -### 3. Data Integrity Validation (数据完整性校验) - -| Check | Level | Description | -|-------|-------|-------------| -| 数据长度匹配 | ERROR | xAxis data length must match series data length | -| NaN检查 | ERROR | No NaN values in data | -| Infinity检查 | ERROR | No Infinity values in data | -| 数值类型检查 | WARNING | Numeric values should be numbers, not strings | - -### 4. Layout Validation (布局校验) - -| Check | Level | Description | -|-------|-------|-------------| -| grid配置 | WARNING | Avoid complex manual positioning | -| 标题长度 | WARNING | Keep under 30 characters | -| 图例项数 | WARNING | Keep under 10 items | -| 数据量 | WARNING | Large datasets may affect performance | - -## Output Formats - -### Markdown Output (default) - -```markdown -## 校验报告 - 销售趋势图 - -### JSON语法校验 -- ✅ JSON解析: 通过 -- ✅ 引号检查: 通过 -- ✅ 尾逗号检查: 通过 -- ✅ 函数检查: 无JavaScript函数 - -### ECharts结构校验 -- ✅ title字段: 通过 -- ✅ tooltip字段: 通过 -- ✅ series字段: 通过 -- ✅ series[0].type: 'bar' - 有效 -- ✅ series[0].data: 12 个数据点 - -### 数据完整性校验 -- ✅ series[0] 数据长度匹配: 通过 -- ✅ series[0] NaN检查: 通过 -- ✅ series[0] Infinity检查: 通过 -- ✅ series[0] 数值类型检查: 所有数值为number类型 - -### 布局校验 -- ✅ grid配置: 使用默认布局 -- ✅ 标题长度: 5字符 - 合适 -- ✅ 图例项数: 3项 - 合理 - -### 校验结果 -✅ **通过** -``` - -### JSON Output (`--format json`) - -```json -{ - "chart_title": "销售趋势图", - "chart_type": "bar", - "passed": true, - "error_count": 0, - "warning_count": 0, - "results": [ - { - "level": "INFO", - "category": "JSON语法校验", - "check": "JSON解析", - "status": true, - "message": "" - } - ] -} -``` - -## Integration with Data Insight Report Workflow - -### When to Validate - -**CRITICAL: Validate each chart immediately after generating it, before moving to the next chart.** - -``` -Generate Chart 1 → Validate Chart 1 → Fix Issues → Confirm Pass - ↓ - Generate Chart 2 → ... -``` - -### Validation Workflow - -1. **Generate chart file** (e.g., `output_fs/charts/chart_01.md`) - -2. **Run validation**: - ```bash - python scripts/validate_echarts.py output_fs/charts/chart_01.md - ``` - -3. **Check result**: - - If `✅ 通过`: Proceed to next chart - - If `❌ 失败`: Review errors, fix the chart, re-validate - -4. **Fix common issues**: - - Single quotes → Double quotes - - Trailing commas → Remove - - JavaScript functions → Use string templates - - String numbers → Convert to actual numbers - - Data length mismatch → Align xAxis and series data - -## Programmatic Usage - -```python -from scripts.validate_echarts import validate_echarts_config, validate_file - -# Validate from string -config = ''' -{ - "title": { "text": "Sales" }, - "tooltip": { "trigger": "axis" }, - "xAxis": { "type": "category", "data": ["A", "B", "C"] }, - "yAxis": { "type": "value" }, - "series": [{ "type": "bar", "data": [100, 200, 300] }] -} -''' -report = validate_echarts_config(config) -print(report.to_markdown()) -print(f"Passed: {report.passed}") - -# Validate from file -report = validate_file("output_fs/charts/chart_01.md") -if not report.passed: - for error in report.errors: - print(f"ERROR: {error.check} - {error.message}") -``` - -## Common Error Fixes - -### Fix 1: Single Quotes - -```javascript -// ❌ WRONG -{ 'name': 'Sales', 'type': 'bar' } - -// ✅ CORRECT -{ "name": "Sales", "type": "bar" } -``` - -### Fix 2: Trailing Commas - -```javascript -// ❌ WRONG -"data": [1, 2, 3,], -"series": [{ "name": "A" },] - -// ✅ CORRECT -"data": [1, 2, 3], -"series": [{ "name": "A" }] -``` - -### Fix 3: JavaScript Functions - -```javascript -// ❌ WRONG -"tooltip": { "formatter": function(params) { return params[0].name; } } - -// ✅ CORRECT -"tooltip": { "formatter": "{b}: {c}" } -``` - -### Fix 4: String Numbers - -```javascript -// ❌ WRONG -"data": ["100", "200", "300"] - -// ✅ CORRECT -"data": [100, 200, 300] -``` - -### Fix 5: Data Length Mismatch - -```javascript -// ❌ WRONG: 3 categories but 4 data points -"xAxis": { "data": ["A", "B", "C"] }, -"series": [{ "data": [10, 20, 30, 40] }] - -// ✅ CORRECT: Align data -"xAxis": { "data": ["A", "B", "C", "D"] }, -"series": [{ "data": [10, 20, 30, 40] }] -``` - -### Fix 6: Complex Layout - -```javascript -// ❌ WRONG: Overly complex manual positioning -"grid": { "left": "3%", "right": "4%", "bottom": "3%", "top": "15%" } - -// ✅ CORRECT: Let ECharts handle it -"grid": { "containLabel": true } -// Or remove grid entirely -``` - -## Notes - -- The validator extracts ECharts config from Markdown files by looking for ```echarts code blocks -- Validation levels: ERROR (must fix), WARNING (should fix), INFO (passed) -- Use `--strict` flag to treat warnings as errors in CI/CD pipelines -- The script returns non-zero exit codes for use in automated workflows diff --git a/libs/hexagent_demo/backend/skills/examples/data-insight-report/scripts/analyze.py b/libs/hexagent_demo/backend/skills/examples/data-insight-report/scripts/analyze.py deleted file mode 100644 index 50636d97..00000000 --- a/libs/hexagent_demo/backend/skills/examples/data-insight-report/scripts/analyze.py +++ /dev/null @@ -1,569 +0,0 @@ -""" -Data Analysis Script using DuckDB. - -Analyzes Excel (.xlsx/.xls) and CSV files using DuckDB's in-process SQL engine. -Supports schema inspection, SQL queries, statistical summaries, and result export. -""" - -import argparse -import hashlib -import json -import logging -import os -import re -import sys -import tempfile - -logging.basicConfig(level=logging.INFO, format="%(message)s") -logger = logging.getLogger(__name__) - -try: - import duckdb -except ImportError: - logger.error("duckdb is not installed. Installing...") - os.system(f"{sys.executable} -m pip install duckdb openpyxl -q") - import duckdb - -try: - import openpyxl # noqa: F401 -except ImportError: - os.system(f"{sys.executable} -m pip install openpyxl -q") - -# Cache directory for persistent DuckDB databases -CACHE_DIR = os.path.join(tempfile.gettempdir(), ".data-analysis-cache") -TABLE_MAP_SUFFIX = ".table_map.json" - - -def compute_files_hash(files: list[str]) -> str: - """Compute a combined SHA256 hash of all input files for cache key.""" - hasher = hashlib.sha256() - for file_path in sorted(files): - try: - with open(file_path, "rb") as f: - while chunk := f.read(8192): - hasher.update(chunk) - except OSError: - # Include path as fallback if file can't be read - hasher.update(file_path.encode()) - return hasher.hexdigest() - - -def get_cache_db_path(files_hash: str) -> str: - """Get the path to the cached DuckDB database file.""" - os.makedirs(CACHE_DIR, exist_ok=True) - return os.path.join(CACHE_DIR, f"{files_hash}.duckdb") - - -def get_table_map_path(files_hash: str) -> str: - """Get the path to the cached table map JSON file.""" - return os.path.join(CACHE_DIR, f"{files_hash}{TABLE_MAP_SUFFIX}") - - -def save_table_map(files_hash: str, table_map: dict[str, str]) -> None: - """Save table map to a JSON file alongside the cached DB.""" - path = get_table_map_path(files_hash) - with open(path, "w", encoding="utf-8") as f: - json.dump(table_map, f, ensure_ascii=False) - - -def load_table_map(files_hash: str) -> dict[str, str] | None: - """Load table map from cache. Returns None if not found.""" - path = get_table_map_path(files_hash) - if not os.path.exists(path): - return None - try: - with open(path, "r", encoding="utf-8") as f: - return json.load(f) - except Exception: - return None - - -def sanitize_table_name(name: str) -> str: - """Sanitize a sheet/file name into a valid SQL table name.""" - sanitized = re.sub(r"[^\w]", "_", name) - if sanitized and sanitized[0].isdigit(): - sanitized = f"t_{sanitized}" - return sanitized - - -def load_files(con: duckdb.DuckDBPyConnection, files: list[str]) -> dict[str, str]: - """ - Load Excel/CSV files into DuckDB tables. - - Returns a mapping of original_name -> sanitized_table_name. - """ - con.execute("INSTALL spatial; LOAD spatial;") - table_map: dict[str, str] = {} - - for file_path in files: - if not os.path.exists(file_path): - logger.error(f"File not found: {file_path}") - continue - - ext = os.path.splitext(file_path)[1].lower() - - if ext in (".xlsx", ".xls"): - _load_excel(con, file_path, table_map) - elif ext in (".csv", ".tsv", ".tab"): - _load_csv(con, file_path, table_map) - else: - logger.warning(f"Unsupported file format: {ext} ({file_path})") - - return table_map - - -def _load_excel( - con: duckdb.DuckDBPyConnection, file_path: str, table_map: dict[str, str] -) -> None: - """Load all sheets from an Excel file into DuckDB tables.""" - import openpyxl - - wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True) - sheet_names = wb.sheetnames - wb.close() - - for sheet_name in sheet_names: - table_name = sanitize_table_name(sheet_name) - - # Handle duplicate table names - original_table_name = table_name - counter = 1 - while table_name in table_map.values(): - table_name = f"{original_table_name}_{counter}" - counter += 1 - - try: - con.execute( - f""" - CREATE TABLE "{table_name}" AS - SELECT * FROM st_read( - '{file_path}', - layer = '{sheet_name}', - open_options = ['HEADERS=FORCE', 'FIELD_TYPES=AUTO'] - ) - """ - ) - table_map[sheet_name] = table_name - row_count = con.execute(f'SELECT COUNT(*) FROM "{table_name}"').fetchone()[ - 0 - ] - logger.info( - f" Loaded sheet '{sheet_name}' -> table '{table_name}' ({row_count} rows)" - ) - except Exception as e: - logger.warning(f" Failed to load sheet '{sheet_name}': {e}") - - -def _load_csv( - con: duckdb.DuckDBPyConnection, file_path: str, table_map: dict[str, str] -) -> None: - """Load a CSV/TSV file into a DuckDB table.""" - base_name = os.path.splitext(os.path.basename(file_path))[0] - table_name = sanitize_table_name(base_name) - - # Handle duplicate table names - original_table_name = table_name - counter = 1 - while table_name in table_map.values(): - table_name = f"{original_table_name}_{counter}" - counter += 1 - - # Detect delimiter - ext = os.path.splitext(file_path)[1].lower() - delimiter = "\t" if ext in (".tsv", ".tab") else "," - - try: - con.execute( - f""" - CREATE TABLE "{table_name}" AS - SELECT * FROM read_csv_auto('{file_path}', delim='{delimiter}') - """ - ) - table_map[base_name] = table_name - row_count = con.execute(f'SELECT COUNT(*) FROM "{table_name}"').fetchone()[0] - logger.info( - f" Loaded CSV '{base_name}' -> table '{table_name}' ({row_count} rows)" - ) - except Exception as e: - logger.warning(f" Failed to load CSV '{base_name}': {e}") - - -def action_inspect(con: duckdb.DuckDBPyConnection, table_map: dict[str, str]) -> str: - """Inspect the schema of all loaded tables.""" - output_parts = [] - - for original_name, table_name in table_map.items(): - output_parts.append(f"\n{'=' * 60}") - output_parts.append(f'Table: {original_name} (SQL name: "{table_name}")') - output_parts.append(f"{'=' * 60}") - - # Get row count - row_count = con.execute(f'SELECT COUNT(*) FROM "{table_name}"').fetchone()[0] - output_parts.append(f"Rows: {row_count}") - - # Get column info - columns = con.execute(f'DESCRIBE "{table_name}"').fetchall() - output_parts.append(f"\nColumns ({len(columns)}):") - output_parts.append(f"{'Name':<30} {'Type':<15} {'Nullable'}") - output_parts.append(f"{'-' * 30} {'-' * 15} {'-' * 8}") - for col in columns: - col_name, col_type, nullable = col[0], col[1], col[2] - output_parts.append(f"{col_name:<30} {col_type:<15} {nullable}") - - # Get non-null counts per column - col_names = [col[0] for col in columns] - non_null_parts = [] - for c in col_names: - non_null_parts.append(f'COUNT("{c}") as "{c}"') - non_null_sql = f'SELECT {", ".join(non_null_parts)} FROM "{table_name}"' - try: - non_null_counts = con.execute(non_null_sql).fetchone() - output_parts.append(f"\nNon-null counts:") - for i, c in enumerate(col_names): - output_parts.append(f" {c}: {non_null_counts[i]} / {row_count}") - except Exception: - pass - - # Sample data (first 5 rows) - output_parts.append(f"\nSample data (first 5 rows):") - try: - sample = con.execute(f'SELECT * FROM "{table_name}" LIMIT 5').fetchdf() - output_parts.append(sample.to_string(index=False)) - except Exception: - sample = con.execute(f'SELECT * FROM "{table_name}" LIMIT 5').fetchall() - header = [col[0] for col in columns] - output_parts.append(" " + " | ".join(header)) - for row in sample: - output_parts.append(" " + " | ".join(str(v) for v in row)) - - result = "\n".join(output_parts) - print(result) - return result - - -def action_query( - con: duckdb.DuckDBPyConnection, - sql: str, - table_map: dict[str, str], - output_file: str | None = None, -) -> str: - """Execute a SQL query and return/export results.""" - # Replace original sheet/file names with sanitized table names in SQL - modified_sql = sql - for original_name, table_name in sorted( - table_map.items(), key=lambda x: len(x[0]), reverse=True - ): - if original_name != table_name: - # Replace occurrences not already quoted - modified_sql = re.sub( - rf"\b{re.escape(original_name)}\b", - f'"{table_name}"', - modified_sql, - ) - - try: - result = con.execute(modified_sql) - columns = [desc[0] for desc in result.description] - rows = result.fetchall() - except Exception as e: - error_msg = f"SQL Error: {e}\n\nAvailable tables:\n" - for orig, tbl in table_map.items(): - cols = con.execute(f'DESCRIBE "{tbl}"').fetchall() - col_names = [c[0] for c in cols] - error_msg += f' "{tbl}" ({orig}): {", ".join(col_names)}\n' - print(error_msg) - return error_msg - - # Format output - if output_file: - return _export_results(columns, rows, output_file) - - # Print as table - return _format_table(columns, rows) - - -def _format_table(columns: list[str], rows: list[tuple]) -> str: - """Format query results as a readable table.""" - if not rows: - msg = "Query returned 0 rows." - print(msg) - return msg - - # Calculate column widths - col_widths = [len(str(c)) for c in columns] - for row in rows: - for i, val in enumerate(row): - col_widths[i] = max(col_widths[i], len(str(val))) - - # Cap column width - max_width = 40 - col_widths = [min(w, max_width) for w in col_widths] - - # Build table - parts = [] - header = " | ".join(str(c).ljust(col_widths[i]) for i, c in enumerate(columns)) - separator = "-+-".join("-" * col_widths[i] for i in range(len(columns))) - parts.append(header) - parts.append(separator) - for row in rows: - row_str = " | ".join( - str(v)[:max_width].ljust(col_widths[i]) for i, v in enumerate(row) - ) - parts.append(row_str) - - parts.append(f"\n({len(rows)} rows)") - result = "\n".join(parts) - print(result) - return result - - -def _export_results(columns: list[str], rows: list[tuple], output_file: str) -> str: - """Export query results to a file (CSV, JSON, or Markdown).""" - os.makedirs(os.path.dirname(output_file), exist_ok=True) - ext = os.path.splitext(output_file)[1].lower() - - if ext == ".csv": - import csv - - with open(output_file, "w", newline="", encoding="utf-8") as f: - writer = csv.writer(f) - writer.writerow(columns) - writer.writerows(rows) - - elif ext == ".json": - records = [] - for row in rows: - record = {} - for i, col in enumerate(columns): - val = row[i] - # Handle non-JSON-serializable types - if hasattr(val, "isoformat"): - val = val.isoformat() - elif isinstance(val, (bytes, bytearray)): - val = val.hex() - record[col] = val - records.append(record) - with open(output_file, "w", encoding="utf-8") as f: - json.dump(records, f, indent=2, ensure_ascii=False, default=str) - - elif ext == ".md": - with open(output_file, "w", encoding="utf-8") as f: - # Header - f.write("| " + " | ".join(columns) + " |\n") - f.write("| " + " | ".join("---" for _ in columns) + " |\n") - # Rows - for row in rows: - f.write( - "| " + " | ".join(str(v).replace("|", "\\|") for v in row) + " |\n" - ) - else: - msg = f"Unsupported output format: {ext}. Use .csv, .json, or .md" - print(msg) - return msg - - msg = f"Results exported to {output_file} ({len(rows)} rows)" - print(msg) - return msg - - -def action_summary( - con: duckdb.DuckDBPyConnection, - table_name: str, - table_map: dict[str, str], -) -> str: - """Generate statistical summary for a table.""" - # Resolve table name - resolved = table_map.get(table_name, table_name) - - try: - columns = con.execute(f'DESCRIBE "{resolved}"').fetchall() - except Exception: - available = ", ".join(f'"{t}" ({o})' for o, t in table_map.items()) - msg = f"Table '{table_name}' not found. Available tables: {available}" - print(msg) - return msg - - row_count = con.execute(f'SELECT COUNT(*) FROM "{resolved}"').fetchone()[0] - - output_parts = [] - output_parts.append(f"\nStatistical Summary: {table_name}") - output_parts.append(f"Total rows: {row_count}") - output_parts.append(f"{'=' * 70}") - - numeric_types = { - "BIGINT", - "INTEGER", - "SMALLINT", - "TINYINT", - "DOUBLE", - "FLOAT", - "DECIMAL", - "HUGEINT", - "REAL", - "NUMERIC", - } - - for col in columns: - col_name, col_type = col[0], col[1].upper() - output_parts.append(f"\n--- {col_name} ({col[1]}) ---") - - # Check base type (strip parameterized parts) - base_type = re.sub(r"\(.*\)", "", col_type).strip() - - if base_type in numeric_types: - try: - stats = con.execute(f""" - SELECT - COUNT("{col_name}") as count, - AVG("{col_name}")::DOUBLE as mean, - STDDEV("{col_name}")::DOUBLE as std, - MIN("{col_name}") as min, - QUANTILE_CONT("{col_name}", 0.25) as q25, - MEDIAN("{col_name}") as median, - QUANTILE_CONT("{col_name}", 0.75) as q75, - MAX("{col_name}") as max, - COUNT(*) - COUNT("{col_name}") as null_count - FROM "{resolved}" - """).fetchone() - labels = [ - "count", - "mean", - "std", - "min", - "25%", - "50%", - "75%", - "max", - "nulls", - ] - for label, val in zip(labels, stats): - if isinstance(val, float): - output_parts.append(f" {label:<8}: {val:,.4f}") - else: - output_parts.append(f" {label:<8}: {val}") - except Exception as e: - output_parts.append(f" Error computing stats: {e}") - else: - try: - stats = con.execute(f""" - SELECT - COUNT("{col_name}") as count, - COUNT(DISTINCT "{col_name}") as unique_count, - MODE("{col_name}") as mode_val, - COUNT(*) - COUNT("{col_name}") as null_count - FROM "{resolved}" - """).fetchone() - output_parts.append(f" count : {stats[0]}") - output_parts.append(f" unique : {stats[1]}") - output_parts.append(f" top : {stats[2]}") - output_parts.append(f" nulls : {stats[3]}") - - # Show top 5 values - top_vals = con.execute(f""" - SELECT "{col_name}", COUNT(*) as freq - FROM "{resolved}" - WHERE "{col_name}" IS NOT NULL - GROUP BY "{col_name}" - ORDER BY freq DESC - LIMIT 5 - """).fetchall() - if top_vals: - output_parts.append(f" top values:") - for val, freq in top_vals: - pct = (freq / row_count * 100) if row_count > 0 else 0 - output_parts.append(f" {val}: {freq} ({pct:.1f}%)") - except Exception as e: - output_parts.append(f" Error computing stats: {e}") - - result = "\n".join(output_parts) - print(result) - return result - - -def main(): - parser = argparse.ArgumentParser(description="Analyze Excel/CSV files using DuckDB") - parser.add_argument( - "--files", - nargs="+", - required=True, - help="Paths to Excel (.xlsx/.xls) or CSV files", - ) - parser.add_argument( - "--action", - required=True, - choices=["inspect", "query", "summary"], - help="Action to perform: inspect, query, or summary", - ) - parser.add_argument( - "--sql", - type=str, - default=None, - help="SQL query to execute (required for 'query' action)", - ) - parser.add_argument( - "--table", - type=str, - default=None, - help="Table name for summary (required for 'summary' action)", - ) - parser.add_argument( - "--output-file", - type=str, - default=None, - help="Path to export results (CSV/JSON/MD)", - ) - args = parser.parse_args() - - # Validate arguments - if args.action == "query" and not args.sql: - parser.error("--sql is required for 'query' action") - if args.action == "summary" and not args.table: - parser.error("--table is required for 'summary' action") - - # Compute file hash for caching - files_hash = compute_files_hash(args.files) - db_path = get_cache_db_path(files_hash) - cached_table_map = load_table_map(files_hash) - - if cached_table_map and os.path.exists(db_path): - # Cache hit: connect to existing DB - logger.info(f"Cache hit! Using cached database: {db_path}") - con = duckdb.connect(db_path, read_only=True) - table_map = cached_table_map - logger.info( - f"Loaded {len(table_map)} table(s) from cache: {', '.join(table_map.keys())}" - ) - else: - # Cache miss: load files and persist to DB - logger.info("Loading files (first time, will cache for future use)...") - con = duckdb.connect(db_path) - table_map = load_files(con, args.files) - - if not table_map: - logger.error("No tables were loaded. Check file paths and formats.") - # Clean up empty DB file - con.close() - if os.path.exists(db_path): - os.remove(db_path) - sys.exit(1) - - # Save table map for future cache lookups - save_table_map(files_hash, table_map) - logger.info( - f"\nLoaded {len(table_map)} table(s): {', '.join(table_map.keys())}" - ) - logger.info(f"Cached database saved to: {db_path}") - - # Perform action - if args.action == "inspect": - action_inspect(con, table_map) - elif args.action == "query": - action_query(con, args.sql, table_map, args.output_file) - elif args.action == "summary": - action_summary(con, args.table, table_map) - - con.close() - - -if __name__ == "__main__": - main() diff --git a/libs/hexagent_demo/backend/skills/examples/data-insight-report/scripts/validate_echarts.py b/libs/hexagent_demo/backend/skills/examples/data-insight-report/scripts/validate_echarts.py deleted file mode 100644 index b6258f63..00000000 --- a/libs/hexagent_demo/backend/skills/examples/data-insight-report/scripts/validate_echarts.py +++ /dev/null @@ -1,649 +0,0 @@ -""" -ECharts Configuration Validator. - -Validates ECharts JSON configurations for syntax, structure, data integrity, and layout. -Used in the data-insight-report workflow to ensure chart configurations are valid before -embedding in reports. -""" - -import argparse -import json -import re -import sys -from dataclasses import dataclass, field -from enum import Enum -from typing import Any - - -class ValidationLevel(Enum): - ERROR = "ERROR" # Must fix - chart will not render - WARNING = "WARNING" # Should fix - may cause issues - INFO = "INFO" # Best practice suggestion - - -@dataclass -class ValidationResult: - """Result of a single validation check.""" - level: ValidationLevel - category: str - check: str - status: bool - message: str = "" - - -@dataclass -class ChartValidationReport: - """Complete validation report for an ECharts configuration.""" - chart_title: str - chart_type: str - results: list[ValidationResult] = field(default_factory=list) - - @property - def passed(self) -> bool: - """Check if all ERROR level validations passed.""" - return all(r.status or r.level != ValidationLevel.ERROR for r in self.results) - - @property - def errors(self) -> list[ValidationResult]: - """Get all ERROR level failures.""" - return [r for r in self.results if not r.status and r.level == ValidationLevel.ERROR] - - @property - def warnings(self) -> list[ValidationResult]: - """Get all WARNING level failures.""" - return [r for r in self.results if not r.status and r.level == ValidationLevel.WARNING] - - def add_result(self, level: ValidationLevel, category: str, check: str, - status: bool, message: str = "") -> None: - """Add a validation result to the report.""" - self.results.append(ValidationResult(level, category, check, status, message)) - - def to_markdown(self) -> str: - """Generate a Markdown report.""" - lines = [] - lines.append(f"## 校验报告 - {self.chart_title}") - lines.append("") - - # Group by category - categories = {} - for r in self.results: - if r.category not in categories: - categories[r.category] = [] - categories[r.category].append(r) - - for category, results in categories.items(): - lines.append(f"### {category}") - for r in results: - icon = "✅" if r.status else ("❌" if r.level == ValidationLevel.ERROR else "⚠️") - lines.append(f"- {icon} {r.check}: {'通过' if r.status else r.message}") - lines.append("") - - # Summary - lines.append("### 校验结果") - if self.passed: - lines.append("✅ **通过**") - else: - lines.append(f"❌ **失败** - {len(self.errors)} 个错误, {len(self.warnings)} 个警告") - if self.errors: - lines.append("") - lines.append("**需要修复的错误:**") - for e in self.errors: - lines.append(f"- {e.check}: {e.message}") - - return "\n".join(lines) - - -# Valid ECharts chart types -VALID_CHART_TYPES = { - "line", "bar", "pie", "scatter", "effectScatter", "radar", "tree", - "treemap", "sunburst", "boxplot", "candlestick", "heatmap", "map", - "parallel", "lines", "graph", "sankey", "funnel", "gauge", "pictorialBar", - "themeRiver", "custom" -} - -# Valid trigger types for tooltip -VALID_TOOLTIP_TRIGGERS = {"item", "axis", "none"} - -# Recommended max lengths -MAX_TITLE_LENGTH = 30 -MAX_LEGEND_ITEMS = 10 -MAX_SERIES_ITEMS = 1000 - - -def validate_json_syntax(config: str) -> tuple[dict | None, list[ValidationResult]]: - """Validate JSON syntax and common issues.""" - results = [] - - # Check 1: Parse as JSON - try: - data = json.loads(config) - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "JSON解析", True - )) - except json.JSONDecodeError as e: - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "JSON解析", False, f"Invalid JSON: {e}" - )) - return None, results - - # Check 2: No single quotes (should use double quotes) - if "'" in config and '"' in config: - # Mixed quotes - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "引号检查", False, - "Mixed single and double quotes detected" - )) - elif "'" in config and '"' not in config: - # Only single quotes - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "引号检查", False, - "Use double quotes instead of single quotes" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "JSON语法校验", "引号检查", True - )) - - # Check 3: No trailing commas - trailing_comma_pattern = r',\s*[\]\}]' - trailing_commas = re.findall(trailing_comma_pattern, config) - if trailing_commas: - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "尾逗号检查", False, - f"Found {len(trailing_commas)} trailing comma(s)" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "JSON语法校验", "尾逗号检查", True - )) - - # Check 4: No JavaScript functions - function_patterns = [ - r'function\s*\(', - r'=>\s*\{', - r'=>\s*[^,\}\]]+\s*\(', - ] - has_function = False - for pattern in function_patterns: - if re.search(pattern, config): - has_function = True - break - - if has_function: - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "函数检查", False, - "JavaScript functions are not allowed in JSON" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "JSON语法校验", "函数检查", True, "无JavaScript函数" - )) - - # Check 5: No undefined - if "undefined" in config.lower(): - results.append(ValidationResult( - ValidationLevel.ERROR, "JSON语法校验", "undefined检查", False, - "'undefined' is not valid JSON" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "JSON语法校验", "undefined检查", True - )) - - return data, results - - -def validate_echarts_structure(data: dict) -> list[ValidationResult]: - """Validate ECharts configuration structure.""" - results = [] - - # Check 1: Required fields - has_title = "title" in data - has_tooltip = "tooltip" in data - has_series = "series" in data and isinstance(data.get("series"), list) and len(data["series"]) > 0 - - if has_title: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", "title字段", True - )) - else: - results.append(ValidationResult( - ValidationLevel.WARNING, "ECharts结构校验", "title字段", False, - "Missing 'title' field (recommended)" - )) - - if has_tooltip: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", "tooltip字段", True - )) - else: - results.append(ValidationResult( - ValidationLevel.WARNING, "ECharts结构校验", "tooltip字段", False, - "Missing 'tooltip' field (recommended)" - )) - - if has_series: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", "series字段", True - )) - else: - results.append(ValidationResult( - ValidationLevel.ERROR, "ECharts结构校验", "series字段", False, - "Missing or empty 'series' field (required)" - )) - return results # Can't continue without series - - # Check 2: Valid chart types - series = data.get("series", []) - chart_types = set() - for i, s in enumerate(series): - chart_type = s.get("type", "").lower() - if chart_type: - chart_types.add(chart_type) - if chart_type in VALID_CHART_TYPES: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", f"series[{i}].type", True, - f"'{chart_type}' - 有效" - )) - else: - results.append(ValidationResult( - ValidationLevel.ERROR, "ECharts结构校验", f"series[{i}].type", False, - f"'{chart_type}' is not a valid ECharts chart type" - )) - - # Check 3: Data exists in series - for i, s in enumerate(series): - chart_type = s.get("type", "").lower() - data_field = s.get("data") - - if data_field is None: - results.append(ValidationResult( - ValidationLevel.ERROR, "ECharts结构校验", f"series[{i}].data", False, - "Missing 'data' field" - )) - elif not isinstance(data_field, list): - results.append(ValidationResult( - ValidationLevel.ERROR, "ECharts结构校验", f"series[{i}].data", False, - "'data' must be an array" - )) - elif len(data_field) == 0: - results.append(ValidationResult( - ValidationLevel.WARNING, "ECharts结构校验", f"series[{i}].data", False, - "'data' array is empty" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", f"series[{i}].data", True, - f"{len(data_field)} 个数据点" - )) - - # Check 4: Axis configuration for bar/line charts - needs_axis = chart_types & {"line", "bar", "scatter", "effectScatter", "boxplot", "candlestick"} - if needs_axis: - has_xaxis = "xAxis" in data - has_yaxis = "yAxis" in data - - if has_xaxis: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", "xAxis配置", True - )) - else: - results.append(ValidationResult( - ValidationLevel.ERROR, "ECharts结构校验", "xAxis配置", False, - f"'{needs_axis}' charts require xAxis" - )) - - if has_yaxis: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", "yAxis配置", True - )) - else: - results.append(ValidationResult( - ValidationLevel.ERROR, "ECharts结构校验", "yAxis配置", False, - f"'{needs_axis}' charts require yAxis" - )) - - # Check 5: Pie chart data format - if "pie" in chart_types: - for i, s in enumerate(series): - if s.get("type", "").lower() == "pie": - pie_data = s.get("data", []) - if pie_data and isinstance(pie_data, list) and len(pie_data) > 0: - # Check if data items have name and value - first_item = pie_data[0] - if isinstance(first_item, dict): - if "name" in first_item and "value" in first_item: - results.append(ValidationResult( - ValidationLevel.INFO, "ECharts结构校验", f"series[{i}] 饼图数据格式", True - )) - else: - results.append(ValidationResult( - ValidationLevel.WARNING, "ECharts结构校验", f"series[{i}] 饼图数据格式", False, - "Pie data items should have 'name' and 'value' properties" - )) - - return results - - -def validate_data_integrity(data: dict) -> list[ValidationResult]: - """Validate data integrity and consistency.""" - results = [] - - series = data.get("series", []) - if not series: - return results - - # Check xAxis data length matches series data length for bar/line - x_axis = data.get("xAxis", {}) - if isinstance(x_axis, list): - x_axis = x_axis[0] if x_axis else {} - - x_data = x_axis.get("data", []) if isinstance(x_axis, dict) else [] - - for i, s in enumerate(series): - chart_type = s.get("type", "").lower() - s_data = s.get("data", []) - - if chart_type in {"line", "bar"} and x_data: - if len(x_data) != len(s_data): - results.append(ValidationResult( - ValidationLevel.ERROR, "数据完整性校验", f"series[{i}] 数据长度匹配", False, - f"xAxis has {len(x_data)} items, but series[{i}] has {len(s_data)} items" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "数据完整性校验", f"series[{i}] 数据长度匹配", True - )) - - # Check for NaN/Infinity in data - if s_data: - has_nan = False - has_inf = False - has_string_number = False - - for item in s_data: - if isinstance(item, dict): - val = item.get("value") - else: - val = item - - if isinstance(val, float): - import math - if math.isnan(val): - has_nan = True - if math.isinf(val): - has_inf = True - elif isinstance(val, str) and val not in ("", None): - # Check if it's a string that looks like a number - try: - float(val) - has_string_number = True - except (ValueError, TypeError): - pass - - if has_nan: - results.append(ValidationResult( - ValidationLevel.ERROR, "数据完整性校验", f"series[{i}] NaN检查", False, - "Data contains NaN values" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "数据完整性校验", f"series[{i}] NaN检查", True - )) - - if has_inf: - results.append(ValidationResult( - ValidationLevel.ERROR, "数据完整性校验", f"series[{i}] Infinity检查", False, - "Data contains Infinity values" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "数据完整性校验", f"series[{i}] Infinity检查", True - )) - - if has_string_number: - results.append(ValidationResult( - ValidationLevel.WARNING, "数据完整性校验", f"series[{i}] 数值类型检查", False, - "Some numeric values are strings, should be numbers" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "数据完整性校验", f"series[{i}] 数值类型检查", True, - "所有数值为number类型" - )) - - return results - - -def validate_layout(data: dict) -> list[ValidationResult]: - """Validate layout configuration.""" - results = [] - - # Check for overly complex grid configuration - grid = data.get("grid", {}) - if grid: - # Check for manual positioning - manual_props = ["left", "right", "top", "bottom"] - set_props = [p for p in manual_props if p in grid and grid[p]] - - if len(set_props) >= 3: - results.append(ValidationResult( - ValidationLevel.WARNING, "布局校验", "grid配置", False, - f"Complex manual grid positioning ({', '.join(set_props)}), consider using containLabel: true instead" - )) - elif grid.get("containLabel"): - results.append(ValidationResult( - ValidationLevel.INFO, "布局校验", "grid配置", True, "使用 containLabel: true" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "布局校验", "grid配置", True, "使用默认布局" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "布局校验", "grid配置", True, "使用默认布局" - )) - - # Check title length - title = data.get("title", {}) - if isinstance(title, dict): - title_text = title.get("text", "") - if title_text: - if len(title_text) > MAX_TITLE_LENGTH: - results.append(ValidationResult( - ValidationLevel.WARNING, "布局校验", "标题长度", False, - f"Title is {len(title_text)} chars, consider keeping under {MAX_TITLE_LENGTH}" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "布局校验", "标题长度", True, f"{len(title_text)}字符 - 合适" - )) - - # Check legend items count - legend = data.get("legend", {}) - if isinstance(legend, dict): - legend_data = legend.get("data", []) - if legend_data: - if len(legend_data) > MAX_LEGEND_ITEMS: - results.append(ValidationResult( - ValidationLevel.WARNING, "布局校验", "图例项数", False, - f"{len(legend_data)} items, consider keeping under {MAX_LEGEND_ITEMS}" - )) - else: - results.append(ValidationResult( - ValidationLevel.INFO, "布局校验", "图例项数", True, f"{len(legend_data)}项 - 合理" - )) - - # Check series data size - series = data.get("series", []) - for i, s in enumerate(series): - s_data = s.get("data", []) - if s_data and len(s_data) > MAX_SERIES_ITEMS: - results.append(ValidationResult( - ValidationLevel.WARNING, "布局校验", f"series[{i}] 数据量", False, - f"{len(s_data)} items, large datasets may affect performance" - )) - - return results - - -def validate_echarts_config(config: str | dict) -> ChartValidationReport: - """ - Validate an ECharts configuration. - - Args: - config: ECharts configuration as JSON string or dict - - Returns: - ChartValidationReport with all validation results - """ - # Parse config if string - if isinstance(config, str): - config_str = config - data, syntax_results = validate_json_syntax(config_str) - else: - data = config - config_str = json.dumps(config, ensure_ascii=False, indent=2) - syntax_results = [ValidationResult( - ValidationLevel.INFO, "JSON语法校验", "JSON解析", True, "已解析为dict对象" - )] - - # Create report - if data: - chart_title = data.get("title", {}).get("text", "未命名图表") - chart_type = data.get("series", [{}])[0].get("type", "unknown") if data.get("series") else "unknown" - else: - chart_title = "解析失败" - chart_type = "unknown" - - report = ChartValidationReport(chart_title, chart_type) - report.results.extend(syntax_results) - - if data is None: - return report - - # Run all validations - report.results.extend(validate_echarts_structure(data)) - report.results.extend(validate_data_integrity(data)) - report.results.extend(validate_layout(data)) - - return report - - -def validate_file(file_path: str) -> ChartValidationReport: - """ - Validate an ECharts configuration from a file. - - Args: - file_path: Path to file containing ECharts JSON config - - Returns: - ChartValidationReport with all validation results - """ - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - - # Try to extract echarts code block if present - echarts_pattern = r'```echarts\s*\n(.*?)\n```' - match = re.search(echarts_pattern, content, re.DOTALL) - if match: - config_str = match.group(1) - else: - config_str = content.strip() - - return validate_echarts_config(config_str) - - -def main(): - parser = argparse.ArgumentParser( - description="Validate ECharts JSON configurations", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - # Validate a JSON file - python validate_echarts.py config.json - - # Validate a Markdown file with echarts code block - python validate_echarts.py chart.md - - # Validate JSON from stdin - echo '{"title":{"text":"test"},"series":[{"type":"bar","data":[1,2,3]}]}' | python validate_echarts.py - - - # Output as JSON - python validate_echarts.py config.json --format json - """ - ) - parser.add_argument( - "file", - help="Path to file containing ECharts config (JSON or Markdown with ```echarts block), or '-' for stdin" - ) - parser.add_argument( - "--format", "-f", - choices=["markdown", "json"], - default="markdown", - help="Output format (default: markdown)" - ) - parser.add_argument( - "--strict", "-s", - action="store_true", - help="Exit with error code on any validation failure (including warnings)" - ) - - args = parser.parse_args() - - # Read input - if args.file == "-": - content = sys.stdin.read() - # Try to extract echarts block - echarts_pattern = r'```echarts\s*\n(.*?)\n```' - match = re.search(echarts_pattern, content, re.DOTALL) - config_str = match.group(1) if match else content.strip() - report = validate_echarts_config(config_str) - else: - report = validate_file(args.file) - - # Output results - if args.format == "json": - output = { - "chart_title": report.chart_title, - "chart_type": report.chart_type, - "passed": report.passed, - "error_count": len(report.errors), - "warning_count": len(report.warnings), - "results": [ - { - "level": r.level.value, - "category": r.category, - "check": r.check, - "status": r.status, - "message": r.message - } - for r in report.results - ] - } - print(json.dumps(output, ensure_ascii=False, indent=2)) - else: - print(report.to_markdown()) - - # Exit code - if not report.passed: - sys.exit(1) - if args.strict and report.warnings: - sys.exit(2) - - -def setup_encoding(): - """Setup UTF-8 encoding for Windows console.""" - import sys - import io - if sys.platform == 'win32': - try: - sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') - sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') - except Exception: - pass - - -if __name__ == "__main__": - setup_encoding() - main() diff --git a/libs/hexagent_demo/backend/skills/examples/deep-research/SKILL.md b/libs/hexagent_demo/backend/skills/examples/deep-research/SKILL.md deleted file mode 100644 index d78ad185..00000000 --- a/libs/hexagent_demo/backend/skills/examples/deep-research/SKILL.md +++ /dev/null @@ -1,201 +0,0 @@ ---- -name: deep-research -description: Use this skill instead of WebSearch for ANY question requiring web research. Trigger on queries like "what is X", "explain X", "compare X and Y", "research X", or before content generation tasks. Provides systematic multi-angle research methodology instead of single superficial searches. Use this proactively when the user's question needs online information. -metadata: - author: deer-flow - url: https://github.com/bytedance/deer-flow/tree/main/skills/public ---- - -# Deep Research Skill - -## Overview - -This skill provides a systematic methodology for conducting thorough web research. **Load this skill BEFORE starting any content generation task** to ensure you gather sufficient information from multiple angles, depths, and sources. - -## When to Use This Skill - -**Always load this skill when:** - -### Research Questions -- User asks "what is X", "explain X", "research X", "investigate X" -- User wants to understand a concept, technology, or topic in depth -- The question requires current, comprehensive information from multiple sources -- A single web search would be insufficient to answer properly - -### Content Generation (Pre-research) -- Creating presentations (PPT/slides) -- Creating frontend designs or UI mockups -- Writing articles, reports, or documentation -- Producing videos or multimedia content -- Any content that requires real-world information, examples, or current data - -## Core Principle - -**Never generate content based solely on general knowledge.** The quality of your output directly depends on the quality and quantity of research conducted beforehand. A single search query is NEVER enough. - -## Research Methodology - -### Phase 1: Broad Exploration - -Start with broad searches to understand the landscape: - -1. **Initial Survey**: Search for the main topic to understand the overall context -2. **Identify Dimensions**: From initial results, identify key subtopics, themes, angles, or aspects that need deeper exploration -3. **Map the Territory**: Note different perspectives, stakeholders, or viewpoints that exist - -Example: -``` -Topic: "AI in healthcare" -Initial searches: -- "AI healthcare applications 2024" -- "artificial intelligence medical diagnosis" -- "healthcare AI market trends" - -Identified dimensions: -- Diagnostic AI (radiology, pathology) -- Treatment recommendation systems -- Administrative automation -- Patient monitoring -- Regulatory landscape -- Ethical considerations -``` - -### Phase 2: Deep Dive - -For each important dimension identified, conduct targeted research: - -1. **Specific Queries**: Search with precise keywords for each subtopic -2. **Multiple Phrasings**: Try different keyword combinations and phrasings -3. **Fetch Full Content**: Use `web_fetch` to read important sources in full, not just snippets -4. **Follow References**: When sources mention other important resources, search for those too - -Example: -``` -Dimension: "Diagnostic AI in radiology" -Targeted searches: -- "AI radiology FDA approved systems" -- "chest X-ray AI detection accuracy" -- "radiology AI clinical trials results" - -Then fetch and read: -- Key research papers or summaries -- Industry reports -- Real-world case studies -``` - -### Phase 3: Diversity & Validation - -Ensure comprehensive coverage by seeking diverse information types: - -| Information Type | Purpose | Example Searches | -|-----------------|---------|------------------| -| **Facts & Data** | Concrete evidence | "statistics", "data", "numbers", "market size" | -| **Examples & Cases** | Real-world applications | "case study", "example", "implementation" | -| **Expert Opinions** | Authority perspectives | "expert analysis", "interview", "commentary" | -| **Trends & Predictions** | Future direction | "trends 2024", "forecast", "future of" | -| **Comparisons** | Context and alternatives | "vs", "comparison", "alternatives" | -| **Challenges & Criticisms** | Balanced view | "challenges", "limitations", "criticism" | - -### Phase 4: Synthesis Check - -Before proceeding to content generation, verify: - -- [ ] Have I searched from at least 3-5 different angles? -- [ ] Have I fetched and read the most important sources in full? -- [ ] Do I have concrete data, examples, and expert perspectives? -- [ ] Have I explored both positive aspects and challenges/limitations? -- [ ] Is my information current and from authoritative sources? - -**If any answer is NO, continue researching before generating content.** - -## Search Strategy Tips - -### Effective Query Patterns - -``` -# Be specific with context -❌ "AI trends" -✅ "enterprise AI adoption trends 2024" - -# Include authoritative source hints -"[topic] research paper" -"[topic] McKinsey report" -"[topic] industry analysis" - -# Search for specific content types -"[topic] case study" -"[topic] statistics" -"[topic] expert interview" - -# Use temporal qualifiers — always use the ACTUAL current year from -"[topic] 2026" # ← replace with real current year, never hardcode a past year -"[topic] latest" -"[topic] recent developments" -``` - -### Temporal Awareness - -**Always check `` in your context before forming ANY search query.** - -`` gives you the full date: year, month, day, and weekday (e.g. `2026-02-28, Saturday`). Use the right level of precision depending on what the user is asking: - -| User intent | Temporal precision needed | Example query | -|---|---|---| -| "today / this morning / just released" | **Month + Day** | `"tech news February 28 2026"` | -| "this week" | **Week range** | `"technology releases week of Feb 24 2026"` | -| "recently / latest / new" | **Month** | `"AI breakthroughs February 2026"` | -| "this year / trends" | **Year** | `"software trends 2026"` | - -**Rules:** -- When the user asks about "today" or "just released", use **month + day + year** in your search queries to get same-day results -- Never drop to year-only when day-level precision is needed — `"tech news 2026"` will NOT surface today's news -- Try multiple phrasings: numeric form (`2026-02-28`), written form (`February 28 2026`), and relative terms (`today`, `this week`) across different queries - -❌ User asks "what's new in tech today" → searching `"new technology 2026"` → misses today's news -✅ User asks "what's new in tech today" → searching `"new technology February 28 2026"` + `"tech news today Feb 28"` → gets today's results - -### When to Use web_fetch - -Use `web_fetch` to read full content when: -- A search result looks highly relevant and authoritative -- You need detailed information beyond the snippet -- The source contains data, case studies, or expert analysis -- You want to understand the full context of a finding - -### Iterative Refinement - -Research is iterative. After initial searches: -1. Review what you've learned -2. Identify gaps in your understanding -3. Formulate new, more targeted queries -4. Repeat until you have comprehensive coverage - -## Quality Bar - -Your research is sufficient when you can confidently answer: -- What are the key facts and data points? -- What are 2-3 concrete real-world examples? -- What do experts say about this topic? -- What are the current trends and future directions? -- What are the challenges or limitations? -- What makes this topic relevant or important now? - -## Common Mistakes to Avoid - -- ❌ Stopping after 1-2 searches -- ❌ Relying on search snippets without reading full sources -- ❌ Searching only one aspect of a multi-faceted topic -- ❌ Ignoring contradicting viewpoints or challenges -- ❌ Using outdated information when current data exists -- ❌ Starting content generation before research is complete - -## Output - -After completing research, you should have: -1. A comprehensive understanding of the topic from multiple angles -2. Specific facts, data points, and statistics -3. Real-world examples and case studies -4. Expert perspectives and authoritative sources -5. Current trends and relevant context - -**Only then proceed to content generation**, using the gathered information to create high-quality, well-informed content. diff --git a/libs/hexagent_demo/backend/skills/examples/doc/LICENSE.txt b/libs/hexagent_demo/backend/skills/examples/doc/LICENSE.txt deleted file mode 100644 index 13e25df8..00000000 --- a/libs/hexagent_demo/backend/skills/examples/doc/LICENSE.txt +++ /dev/null @@ -1,201 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf of - any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don\'t include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/libs/hexagent_demo/backend/skills/examples/doc/SKILL.md b/libs/hexagent_demo/backend/skills/examples/doc/SKILL.md deleted file mode 100644 index b31f0312..00000000 --- a/libs/hexagent_demo/backend/skills/examples/doc/SKILL.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -name: "doc" -description: "Use when the task involves reading, creating, or editing `.docx` documents, especially when formatting or layout fidelity matters; prefer `python-docx` plus the bundled `scripts/render_docx.py` for visual checks." -metadata: - author: openai - url: https://github.com/openai/skills/tree/main/skills/.curated ---- - - -# DOCX Skill - -## When to use -- Read or review DOCX content where layout matters (tables, diagrams, pagination). -- Create or edit DOCX files with professional formatting. -- Validate visual layout before delivery. - -## Workflow -1. Prefer visual review (layout, tables, diagrams). - - If `soffice` and `pdftoppm` are available, convert DOCX -> PDF -> PNGs. - - Or use `scripts/render_docx.py` (requires `pdf2image` and Poppler). - - If these tools are missing, install them or ask the user to review rendered pages locally. -2. Use `python-docx` for edits and structured creation (headings, styles, tables, lists). -3. After each meaningful change, re-render and inspect the pages. -4. If visual review is not possible, extract text with `python-docx` as a fallback and call out layout risk. -5. Keep intermediate outputs organized and clean up after final approval. - -## Temp and output conventions -- Use `tmp/docs/` for intermediate files; delete when done. -- Write final artifacts under `output/doc/` when working in this repo. -- Keep filenames stable and descriptive. - -## Dependencies (install if missing) -Prefer `uv` for dependency management. - -Python packages: -``` -uv pip install python-docx pdf2image -``` -If `uv` is unavailable: -``` -python3 -m pip install python-docx pdf2image -``` -System tools (for rendering): -``` -# macOS (Homebrew) -brew install libreoffice poppler - -# Ubuntu/Debian -sudo apt-get install -y libreoffice poppler-utils -``` - -If installation isn't possible in this environment, tell the user which dependency is missing and how to install it locally. - -## Environment -No required environment variables. - -## Rendering commands -DOCX -> PDF: -``` -soffice -env:UserInstallation=file:///tmp/lo_profile_$$ --headless --convert-to pdf --outdir $OUTDIR $INPUT_DOCX -``` - -PDF -> PNGs: -``` -pdftoppm -png $OUTDIR/$BASENAME.pdf $OUTDIR/$BASENAME -``` - -Bundled helper: -``` -python3 scripts/render_docx.py /path/to/file.docx --output_dir /tmp/docx_pages -``` - -## Quality expectations -- Deliver a client-ready document: consistent typography, spacing, margins, and clear hierarchy. -- Avoid formatting defects: clipped/overlapping text, broken tables, unreadable characters, or default-template styling. -- Charts, tables, and visuals must be legible in rendered pages with correct alignment. -- Use ASCII hyphens only. Avoid U+2011 (non-breaking hyphen) and other Unicode dashes. -- Citations and references must be human-readable; never leave tool tokens or placeholder strings. - -## Final checks -- Re-render and inspect every page at 100% zoom before final delivery. -- Fix any spacing, alignment, or pagination issues and repeat the render loop. -- Confirm there are no leftovers (temp files, duplicate renders) unless the user asks to keep them. diff --git a/libs/hexagent_demo/backend/skills/examples/doc/agents/openai.yaml b/libs/hexagent_demo/backend/skills/examples/doc/agents/openai.yaml deleted file mode 100644 index 27ce451b..00000000 --- a/libs/hexagent_demo/backend/skills/examples/doc/agents/openai.yaml +++ /dev/null @@ -1,6 +0,0 @@ -interface: - display_name: "Word Docs" - short_description: "Edit and review docx files" - icon_small: "./assets/doc-small.svg" - icon_large: "./assets/doc.png" - default_prompt: "Edit or review this .docx file and return the updated file plus a concise change summary." diff --git a/libs/hexagent_demo/backend/skills/examples/doc/assets/doc-small.svg b/libs/hexagent_demo/backend/skills/examples/doc/assets/doc-small.svg deleted file mode 100644 index 97289eb2..00000000 --- a/libs/hexagent_demo/backend/skills/examples/doc/assets/doc-small.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/libs/hexagent_demo/backend/skills/examples/doc/assets/doc.png b/libs/hexagent_demo/backend/skills/examples/doc/assets/doc.png deleted file mode 100644 index e1651789..00000000 Binary files a/libs/hexagent_demo/backend/skills/examples/doc/assets/doc.png and /dev/null differ diff --git a/libs/hexagent_demo/backend/skills/examples/doc/scripts/render_docx.py b/libs/hexagent_demo/backend/skills/examples/doc/scripts/render_docx.py deleted file mode 100644 index 907ec89c..00000000 --- a/libs/hexagent_demo/backend/skills/examples/doc/scripts/render_docx.py +++ /dev/null @@ -1,296 +0,0 @@ -import argparse -import os -import re -import subprocess -import tempfile -import xml.etree.ElementTree as ET -from os import makedirs, replace -from os.path import abspath, basename, exists, expanduser, join, splitext -from shutil import which -import sys -from typing import Sequence, cast -from zipfile import ZipFile - -from pdf2image import convert_from_path, pdfinfo_from_path - -TWIPS_PER_INCH: int = 1440 - - -def ensure_system_tools() -> None: - missing: list[str] = [] - for tool in ("soffice", "pdftoppm"): - if which(tool) is None: - missing.append(tool) - if missing: - tools = ", ".join(missing) - raise RuntimeError( - f"Missing required system tool(s): {tools}. Install LibreOffice and Poppler, then retry." - ) - - -def calc_dpi_via_ooxml_docx(input_path: str, max_w_px: int, max_h_px: int) -> int: - """Calculate DPI from OOXML `word/document.xml` page size (w:pgSz in twips). - - DOCX stores page dimensions in section properties as twips (1/1440 inch). - We read the first encountered section's page size and compute an isotropic DPI - that fits within the target max pixel dimensions. - """ - with ZipFile(input_path, "r") as zf: - xml = zf.read("word/document.xml") - root = ET.fromstring(xml) - ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"} - - # Common placements: w:body/w:sectPr or w:body/w:p/w:pPr/w:sectPr - sect_pr = root.find(".//w:sectPr", ns) - if sect_pr is None: - raise RuntimeError("Section properties not found in document.xml") - pg_sz = sect_pr.find("w:pgSz", ns) - if pg_sz is None: - raise RuntimeError("Page size not found in section properties") - - # Values are in twips - w_twips_str = pg_sz.get( - "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}w" - ) or pg_sz.get("w") - h_twips_str = pg_sz.get( - "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}h" - ) or pg_sz.get("h") - - if not w_twips_str or not h_twips_str: - raise RuntimeError("Page size attributes missing in pgSz") - - width_in = int(w_twips_str) / TWIPS_PER_INCH - height_in = int(h_twips_str) / TWIPS_PER_INCH - if width_in <= 0 or height_in <= 0: - raise RuntimeError("Invalid page size values in document.xml") - return round(min(max_w_px / width_in, max_h_px / height_in)) - - -def calc_dpi_via_pdf(input_path: str, max_w_px: int, max_h_px: int) -> int: - """Convert input to PDF and compute DPI from its page size.""" - with tempfile.TemporaryDirectory(prefix="soffice_profile_") as user_profile: - with tempfile.TemporaryDirectory(prefix="soffice_convert_") as convert_tmp_dir: - stem = splitext(basename(input_path))[0] - pdf_path = convert_to_pdf(input_path, user_profile, convert_tmp_dir, stem) - if not (pdf_path and exists(pdf_path)): - raise RuntimeError("Failed to convert input to PDF for DPI computation.") - - info = pdfinfo_from_path(pdf_path) - size_val = info.get("Page size") - if not size_val: - for k, v in info.items(): - if isinstance(v, str) and "size" in k.lower() and "pts" in v: - size_val = v - break - if not isinstance(size_val, str): - raise RuntimeError("Failed to read PDF page size for DPI computation.") - - m = re.search(r"(\d+)\s*x\s*(\d+)\s*pts", size_val) - if not m: - raise RuntimeError("Unrecognized PDF page size format.") - width_pts = int(m.group(1)) - height_pts = int(m.group(2)) - width_in = width_pts / 72.0 - height_in = height_pts / 72.0 - if width_in <= 0 or height_in <= 0: - raise RuntimeError("Invalid PDF page size values.") - return round(min(max_w_px / width_in, max_h_px / height_in)) - - -def run_cmd_no_check(cmd: list[str]) -> None: - subprocess.run( - cmd, - check=False, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - env=os.environ.copy(), - ) - - -def convert_to_pdf( - doc_path: str, - user_profile: str, - convert_tmp_dir: str, - stem: str, -) -> str: - # Try direct DOC(X) -> PDF - cmd_pdf = [ - "soffice", - "-env:UserInstallation=file://" + user_profile, - "--invisible", - "--headless", - "--norestore", - "--convert-to", - "pdf", - "--outdir", - convert_tmp_dir, - doc_path, - ] - run_cmd_no_check(cmd_pdf) - - pdf_path = join(convert_tmp_dir, f"{stem}.pdf") - if exists(pdf_path): - return pdf_path - - # Fallback: DOCX -> ODT, then ODT -> PDF - cmd_odt = [ - "soffice", - "-env:UserInstallation=file://" + user_profile, - "--invisible", - "--headless", - "--norestore", - "--convert-to", - "odt", - "--outdir", - convert_tmp_dir, - doc_path, - ] - run_cmd_no_check(cmd_odt) - - odt_path = join(convert_tmp_dir, f"{stem}.odt") - - if exists(odt_path): - cmd_odt_pdf = [ - "soffice", - "-env:UserInstallation=file://" + user_profile, - "--invisible", - "--headless", - "--norestore", - "--convert-to", - "pdf", - "--outdir", - convert_tmp_dir, - odt_path, - ] - run_cmd_no_check(cmd_odt_pdf) - if exists(pdf_path): - return pdf_path - - return "" - - -def rasterize( - doc_path: str, - out_dir: str, - dpi: int, -) -> Sequence[str]: - """Rasterise DOCX (or similar) to images placed in out_dir and return their paths. - - Images are named as page-. with pages starting at 1. - """ - makedirs(out_dir, exist_ok=True) - doc_path = abspath(doc_path) - stem = splitext(basename(doc_path))[0] - - # Use a unique user profile to avoid LibreOffice profile lock when running concurrently - with tempfile.TemporaryDirectory(prefix="soffice_profile_") as user_profile: - # Write conversion outputs into a temp directory to avoid any IO oddities - with tempfile.TemporaryDirectory(prefix="soffice_convert_") as convert_tmp_dir: - pdf_path = convert_to_pdf( - doc_path, - user_profile, - convert_tmp_dir, - stem, - ) - - if not pdf_path or not exists(pdf_path): - raise RuntimeError( - "Failed to produce PDF for rasterization (direct and ODT fallback)." - ) - paths_raw = cast( - list[str], - convert_from_path( - pdf_path, - dpi=dpi, - fmt="png", - thread_count=8, - output_folder=out_dir, - paths_only=True, - output_file="page", - ), - ) - - # Rename convert_from_path's output format f'page{thread_id:04d}-{page_num:02d}.' to 'page-.' - pages: list[tuple[int, str]] = [] - for src_path in paths_raw: - base = splitext(basename(src_path))[0] - page_num_str = base.split("-")[-1] - page_num = int(page_num_str) - dst_path = join(out_dir, f"page-{page_num}.png") - replace(src_path, dst_path) - pages.append((page_num, dst_path)) - pages.sort(key=lambda t: t[0]) - final_paths = [path for _, path in pages] - return final_paths - - -def main() -> None: - parser = argparse.ArgumentParser(description="Render DOCX-like file to PNG images.") - parser.add_argument( - "input_path", - type=str, - help="Path to the input DOCX file (or compatible).", - ) - parser.add_argument( - "--output_dir", - type=str, - default=None, - help=( - "Output directory for the rendered images. " - "Defaults to a folder next to the input named after the input file (without extension)." - ), - ) - parser.add_argument( - "--width", - type=int, - default=1600, - help=( - "Approximate maximum width in pixels after isotropic scaling (default 1600). " - "The actual value may exceed slightly." - ), - ) - parser.add_argument( - "--height", - type=int, - default=2000, - help=( - "Approximate maximum height in pixels after isotropic scaling (default 2000). " - "The actual value may exceed slightly." - ), - ) - parser.add_argument( - "--dpi", - type=int, - default=None, - help=("Override computed DPI. If provided, skips DOCX/PDF-based DPI calculation."), - ) - args = parser.parse_args() - - try: - ensure_system_tools() - - input_path = abspath(expanduser(args.input_path)) - out_dir = ( - abspath(expanduser(args.output_dir)) if args.output_dir else splitext(input_path)[0] - ) - - if args.dpi is not None: - dpi = int(args.dpi) - else: - try: - if input_path.lower().endswith((".docx", ".docm", ".dotx", ".dotm")): - dpi = calc_dpi_via_ooxml_docx(input_path, args.width, args.height) - else: - raise RuntimeError("Skip OOXML DPI; not a DOCX container") - except Exception: - dpi = calc_dpi_via_pdf(input_path, args.width, args.height) - - rasterize(input_path, out_dir, dpi) - print("Pages rendered to " + out_dir) - except RuntimeError as exc: - print(f"Error: {exc}", file=sys.stderr) - raise SystemExit(1) - - -if __name__ == "__main__": - main() diff --git a/libs/hexagent_demo/backend/skills/examples/excel-xlsx/SKILL.md b/libs/hexagent_demo/backend/skills/examples/excel-xlsx/SKILL.md deleted file mode 100644 index 9fef55e8..00000000 --- a/libs/hexagent_demo/backend/skills/examples/excel-xlsx/SKILL.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -name: excel-xlsx -version: 1.0.2 -homepage: https://clawic.com/skills/excel-xlsx -description: "Create, inspect, and edit Microsoft Excel workbooks and XLSX files with reliable formulas, dates, types, formatting, recalculation, and template preservation. Use when (1) the task is about Excel, `.xlsx`, `.xlsm`, `.xls`, `.csv`, or `.tsv`; (2) formulas, formatting, workbook structure, or compatibility matter; (3) the file must stay reliable after edits." -changelog: Tightened formula anchoring, recalculation, and model traceability after a stricter external spreadsheet audit. -metadata: {"clawdbot":{"emoji":"📗","requires":{"bins":[]},"os":["linux","darwin","win32"]}} ---- - -## When to Use - -Use when the main artifact is a Microsoft Excel workbook or spreadsheet file, especially when formulas, dates, formatting, merged cells, workbook structure, or cross-platform behavior matter. - -## Core Rules - -### 1. Choose the workflow by job, not by habit - -- Use `pandas` for analysis, reshaping, and CSV-like tasks. -- Use `openpyxl` when formulas, styles, sheets, comments, merged cells, or workbook preservation matter. -- Treat CSV as plain data exchange, not as an Excel feature-complete format. -- Reading values, preserving a live workbook, and building a model from scratch are different spreadsheet jobs. - -### 2. Dates are serial numbers with legacy quirks - -- Excel stores dates as serial numbers, not real date objects. -- The 1900 date system includes the false leap-day bug, and some workbooks use the 1904 system. -- Time is fractional day data, so formatting and conversion both matter. -- Date correctness is not enough if the number format still displays the wrong thing to the user. - -### 3. Keep calculations in Excel when the workbook should stay live - -- Write formulas into cells instead of hardcoding derived results from Python. -- Use references to assumption cells instead of magic numbers inside formulas. -- Cached formula values can be stale, so do not trust them blindly after edits. -- Check copied formulas for wrong ranges, wrong sheets, and silent off-by-one drift before delivery. -- Absolute and relative references are part of the logic, so copied formulas can be wrong even when they still "work". -- Test new formulas on a few representative cells before filling them across a whole block. -- Verify denominators, named ranges, and precedent cells before shipping formulas that depend on them. -- A workbook should ship with zero formula errors, not with known `#REF!`, `#DIV/0!`, `#VALUE!`, `#NAME?`, or circular-reference fallout left for the user to fix. -- For model-style work, document non-obvious hardcodes, assumptions, or source inputs in comments or nearby notes. - -### 4. Protect data types before Excel mangles them - -- Long identifiers, phone numbers, ZIP codes, and leading-zero values should usually be stored as text. -- Excel silently truncates numeric precision past 15 digits. -- Mixed text-number columns need explicit handling on read and on write. -- Scientific notation, auto-parsed dates, and stripped leading zeros are common corruption, not cosmetic issues. - -### 5. Preserve workbook structure before changing content - -- Existing templates override generic styling advice. -- Only the top-left cell of a merged range stores the value. -- Hidden rows, hidden columns, named ranges, and external references can still affect formulas and outputs. -- Shared strings, defined names, and sheet-level conventions can matter even when the visible cells look simple. -- Match styles for newly filled cells instead of quietly introducing a new visual system. -- If the workbook is a template, preserve sheet order, widths, freezes, filters, print settings, validations, and visual conventions unless the task explicitly changes them. -- Conditional formatting, filters, print areas, and data validation often carry business meaning even when users only mention the numbers. -- If there is no existing style guide and the file is a model, keep editable inputs visually distinguishable from formulas, but never override an established template to force a generic house style. - -### 6. Recalculate and review before delivery - -- Formula strings alone are not enough if the recipient needs current values. -- `openpyxl` preserves formulas but does not calculate them. -- Verify no `#REF!`, `#DIV/0!`, `#VALUE!`, `#NAME?`, or circular-reference fallout remains. -- If layout matters, render or visually review the workbook before calling it finished. -- Be careful with read modes: opening a workbook for values only and then saving can flatten formulas into static values. -- If assumptions or hardcoded overrides must stay, make them obvious enough that the next editor can audit the workbook. - -### 7. Scale the workflow to the file size - -- Large workbooks can fail for boring reasons: memory spikes, padded empty rows, and slow full-sheet reads. -- Use streaming or chunked reads when the file is big enough that loading everything at once becomes fragile. -- Large-file workflows also need narrower reads, explicit dtypes, and sheet targeting to avoid accidental damage. - -## Common Traps - -- Type inference on read can leave numbers as text or convert IDs into damaged numeric values. -- Column indexing varies across tools, so off-by-one mistakes are common in generated formulas. -- Newlines in cells need wrapping to display correctly. -- External references break easily when source files move. -- Password protection in old Excel workflows is not serious security. -- `.xlsm` can contain macros, and `.xls` remains a tighter legacy format. -- Large files may need streaming reads or more careful memory handling. -- Google Sheets and LibreOffice can reinterpret dates, formulas, or styling differently from Excel. -- Dynamic array or newer Excel functions like `FILTER`, `XLOOKUP`, `SORT`, or `SEQUENCE` may fail or degrade in older viewers. -- A workbook can look fine while still carrying stale cached values from a prior recalculation. -- Saving the wrong workbook view can replace formulas with cached values and quietly destroy a live model. -- Copying formulas without checking relative references can push one bad range across an entire block. -- Hidden sheets, named ranges, validations, and merged areas often keep business logic that is invisible in a quick skim. -- A workbook can appear numerically correct while still failing because filters, conditional formats, print settings, or data validation were stripped. -- A workbook can be numerically correct and still fail visually because wrapped text, clipped labels, or narrow columns were never reviewed. - -## Related Skills -Install with `clawhub install ` if user confirms: -- `csv` — Plain-text tabular import and export workflows. -- `data` — General data handling patterns before spreadsheet output. -- `data-analysis` — Higher-level analysis that can feed workbook deliverables. - -## Feedback - -- If useful: `clawhub star excel-xlsx` -- Stay updated: `clawhub sync` diff --git a/libs/hexagent_demo/backend/skills/examples/excel-xlsx/_meta.json b/libs/hexagent_demo/backend/skills/examples/excel-xlsx/_meta.json deleted file mode 100644 index de7c08cd..00000000 --- a/libs/hexagent_demo/backend/skills/examples/excel-xlsx/_meta.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "ownerId": "kn73vp5rarc3b14rc7wjcw8f8580t5d1", - "slug": "excel-xlsx", - "version": "1.0.2", - "publishedAt": 1773243166499 -} diff --git a/libs/hexagent_demo/backend/skills/examples/find-skills/.clawhub/origin.json b/libs/hexagent_demo/backend/skills/examples/find-skills/.clawhub/origin.json deleted file mode 100644 index aa95f834..00000000 --- a/libs/hexagent_demo/backend/skills/examples/find-skills/.clawhub/origin.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "version": 1, - "registry": "https://clawhub.ai", - "slug": "find-skills", - "installedVersion": "0.1.0", - "installedAt": 1773632645412 -} diff --git a/libs/hexagent_demo/backend/skills/examples/find-skills/SKILL.md b/libs/hexagent_demo/backend/skills/examples/find-skills/SKILL.md deleted file mode 100644 index 0cbb839a..00000000 --- a/libs/hexagent_demo/backend/skills/examples/find-skills/SKILL.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -name: find-skills -description: Highest-priority skill discovery flow. MUST trigger when users ask to find/install skills (e.g. 技能, 找技能, find-skill, find-skills, install skill). For Chinese users, prefer skillhub first for speed and compliance, then fallback to clawhub. -metadata: - author: openclaw - url: https://github.com/openclaw/openclaw/tree/main/skills ---- - -# Find Skills - -This skill helps discover, compare, and install skills. - -## Priority Rules (Mandatory) - -1. This skill is highest-priority for skill discovery/install intents. -2. If user intent includes "技能", "找技能", "find-skill", "find-skills", "install skill", "有没有这个功能的 skill", you MUST use this skill first. -3. Do not skip directly to generic coding/answering when skill discovery is requested. - -## Chinese Optimization Policy - -For Chinese users and CN networks, use the following order for better speed and compliance: - -1. `skillhub` (cn-optimized, preferred) -2. `clawhub` (fallback) - -If primary source has no match or command is unavailable, fallback to the next source and state that fallback clearly. - -## Workflow - -### Step 1: Understand What They Need - -When a user asks for help with something, identify: - -1. The domain (e.g., React, testing, design, deployment) -2. The specific task (e.g., writing tests, creating animations, reviewing PRs) -3. Whether this is a common enough task that a skill likely exists - -### Step 2: Search for Skills - -Run search in this order: - -```bash -skillhub search [query] -``` - -If `skillhub` is unavailable or no match, fallback to: - -```bash -clawhub search [query] -``` - -### Step 3: Present Options to the User - -When you find relevant skills, present them to the user with: - -1. The skill name and what it does -2. The source used (`skillhub` / `clawhub`) -3. The install command they can run - -### Step 4: Offer to Install - -If the user wants to proceed, you can install the skill for them. - -Preferred install order: - -1. Try `skillhub install ` when the result comes from `skillhub`. -2. If no `skillhub` candidate exists, use `clawhub install `. - -Before install, summarize source, version, and notable risk signals. - -## When No Skills Are Found - -If no relevant skills exist: - -1. Acknowledge that no existing skill was found -2. Offer to help with the task directly using your general capabilities -3. Suggest creating a custom local skill in the workspace if this is a recurring need diff --git a/libs/hexagent_demo/backend/skills/examples/find-skills/_meta.json b/libs/hexagent_demo/backend/skills/examples/find-skills/_meta.json deleted file mode 100644 index 90d2ac88..00000000 --- a/libs/hexagent_demo/backend/skills/examples/find-skills/_meta.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "ownerId": "kn77ajmmqw3cgnc3ay1x3e0ccd805hsw", - "slug": "find-skills", - "version": "0.1.0", - "publishedAt": 1769698710765 -} diff --git a/libs/hexagent_demo/backend/skills/examples/frontend-dev/SKILL.md b/libs/hexagent_demo/backend/skills/examples/frontend-dev/SKILL.md deleted file mode 100644 index 88569725..00000000 --- a/libs/hexagent_demo/backend/skills/examples/frontend-dev/SKILL.md +++ /dev/null @@ -1,567 +0,0 @@ ---- -name: frontend-dev -description: | - Full-stack frontend development combining premium UI design, cinematic animations, - AI-generated media assets, persuasive copywriting, and visual art. Builds complete, - visually striking web pages with real media, advanced motion, and compelling copy. - Use when: building landing pages, marketing sites, product pages, dashboards, - generating media assets (image/video/audio/music), writing conversion copy, - creating generative art, or implementing cinematic scroll animations. -license: MIT -metadata: - version: "1.0.0" - category: frontend - sources: - - Framer Motion documentation - - GSAP / GreenSock documentation - - Three.js documentation - - Tailwind CSS documentation - - React / Next.js documentation - - AIDA Framework (Elmo Lewis) - - p5.js documentation ---- - -# Frontend Studio - -Build complete, production-ready frontend pages by orchestrating 5 specialized capabilities: design engineering, motion systems, AI-generated assets, persuasive copy, and generative art. - -## Invocation - -``` -/frontend-dev -``` - -The user provides their request as natural language (e.g. "build a landing page for a music streaming app"). - -## Skill Structure - -``` -frontend-dev/ -├── SKILL.md # Core skill (this file) -├── scripts/ # Asset generation scripts -│ ├── minimax_tts.py # Text-to-speech -│ ├── minimax_music.py # Music generation -│ ├── minimax_video.py # Video generation (async) -│ └── minimax_image.py # Image generation -├── references/ # Detailed guides (read as needed) -│ ├── minimax-cli-reference.md # CLI flags quick reference -│ ├── asset-prompt-guide.md # Asset prompt engineering rules -│ ├── minimax-tts-guide.md # TTS usage & voices -│ ├── minimax-music-guide.md # Music prompts & lyrics format -│ ├── minimax-video-guide.md # Camera commands & models -│ ├── minimax-image-guide.md # Ratios & batch generation -│ ├── minimax-voice-catalog.md # All voice IDs -│ ├── motion-recipes.md # Animation code snippets -│ ├── env-setup.md # Environment setup -│ └── troubleshooting.md # Common issues -├── templates/ # Visual art templates -│ ├── viewer.html # p5.js interactive art base -│ └── generator_template.js # p5.js code reference -└── canvas-fonts/ # Static art fonts (TTF + licenses) -``` - -## Project Structure - -### Assets (Universal) - -All frameworks use the same asset organization: - -``` -assets/ -├── images/ -│ ├── hero-landing-1710xxx.webp -│ ├── icon-feature-01.webp -│ └── bg-pattern.svg -├── videos/ -│ ├── hero-bg-1710xxx.mp4 -│ └── demo-preview.mp4 -└── audio/ - ├── bgm-ambient-1710xxx.mp3 - └── tts-intro-1710xxx.mp3 -``` - -**Asset naming:** `{type}-{descriptor}-{timestamp}.{ext}` - -### By Framework - -| Framework | Asset Location | Component Location | -|-----------|---------------|-------------------| -| **Pure HTML** | `./assets/` | N/A (inline or `./js/`) | -| **React/Next.js** | `public/assets/` | `src/components/` | -| **Vue/Nuxt** | `public/assets/` | `src/components/` | -| **Svelte/SvelteKit** | `static/assets/` | `src/lib/components/` | -| **Astro** | `public/assets/` | `src/components/` | - -### Pure HTML - -``` -project/ -├── index.html -├── assets/ -│ ├── images/ -│ ├── videos/ -│ └── audio/ -├── css/ -│ └── styles.css -└── js/ - └── main.js # Animations (GSAP/vanilla) -``` - -### React / Next.js - -``` -project/ -├── public/assets/ # Static assets -├── src/ -│ ├── components/ -│ │ ├── ui/ # Button, Card, Input -│ │ ├── sections/ # Hero, Features, CTA -│ │ └── motion/ # RevealSection, StaggerGrid -│ ├── lib/ -│ ├── styles/ -│ └── app/ # Pages -└── package.json -``` - -### Vue / Nuxt - -``` -project/ -├── public/assets/ -├── src/ # or root for Nuxt -│ ├── components/ -│ │ ├── ui/ -│ │ ├── sections/ -│ │ └── motion/ -│ ├── composables/ # Shared logic -│ ├── pages/ -│ └── assets/ # Processed assets (optional) -└── package.json -``` - -### Astro - -``` -project/ -├── public/assets/ -├── src/ -│ ├── components/ # .astro, .tsx, .vue, .svelte -│ ├── layouts/ -│ ├── pages/ -│ └── styles/ -└── package.json -``` - -**Component naming:** PascalCase (`HeroSection.tsx`, `HeroSection.vue`, `HeroSection.astro`) - ---- - -## Compliance - -**All rules in this skill are mandatory. Violating any rule is a blocking error — fix before proceeding or delivering.** - ---- - -## Workflow -### Phase 1: Design Architecture -1. Analyze the request — determine page type and context -2. Set design dials based on page type -3. Plan layout sections and identify asset needs - -### Phase 2: Motion Architecture -1. Select animation tools per section (see Tool Selection Matrix) -2. Plan motion sequences following performance guardrails - -### Phase 3: Asset Generation -Generate all image/video/audio assets using `scripts/`. NEVER use placeholder URLs (unsplash, picsum, placeholder.com, via.placeholder, placehold.co, etc.) or external URLs. - -1. Parse asset requirements (type, style, spec, usage) -2. Craft optimized prompts, show to user, confirm before generating -3. Execute via scripts, save to project — do NOT proceed to Phase 5 until all assets are saved locally - -### Phase 4: Copywriting & Content -Follow copywriting frameworks (AIDA, PAS, FAB) to craft all text content. Do NOT use "Lorem ipsum" — write real copy. - -### Phase 5: Build UI -Scaffold the project and build each section following Design and Motion rules. Integrate generated assets and copy. All ``, `