diff --git a/libs/hexagent/hexagent/computer/local/_wsl.py b/libs/hexagent/hexagent/computer/local/_wsl.py index 4f2dc500..1a3cdb1a 100644 --- a/libs/hexagent/hexagent/computer/local/_wsl.py +++ b/libs/hexagent/hexagent/computer/local/_wsl.py @@ -46,10 +46,37 @@ def _decode_wsl_output(raw: bytes) -> str: - """Decode WSL output that may be UTF-16-LE on some Windows builds.""" - if raw[:2] == b"\xff\xfe" or b"\x00" in raw: - return raw.decode("utf-16-le", errors="replace").replace("\x00", "") - return raw.decode("utf-8", errors="replace") + """Decode WSL output that may mix UTF-16-LE and UTF-8 bytes. + + Some Windows builds emit UTF-16-LE diagnostics from ``wsl.exe`` and then + append plain UTF-8 stderr from the invoked shell in the same stream. + """ + if not raw: + return "" + + # Handle BOM-prefixed UTF-16-LE while preserving the remaining bytes for + # mixed-stream recovery below. + if raw.startswith(b"\xff\xfe"): + raw = raw[2:] + + # Fast path: regular UTF-8 output. + if b"\x00" not in raw: + return raw.decode("utf-8", errors="replace") + + # Mixed-path: decode the UTF-16-LE prefix up to the last NUL byte, then + # decode any trailing bytes as UTF-8 (common bash stderr tail). + last_nul = raw.rfind(b"\x00") + split = last_nul + 1 + if split % 2 != 0: + split += 1 + + head = raw[:split] + tail = raw[split:] + + text = head.decode("utf-16-le", errors="replace").replace("\x00", "") + if tail: + text += tail.decode("utf-8", errors="replace") + return text def _resolve_wsl_exe() -> str | None: diff --git a/libs/hexagent/hexagent/computer/local/vm.py b/libs/hexagent/hexagent/computer/local/vm.py index 7ba91d52..b5f3d032 100644 --- a/libs/hexagent/hexagent/computer/local/vm.py +++ b/libs/hexagent/hexagent/computer/local/vm.py @@ -15,7 +15,7 @@ import shlex import sys import uuid -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING import petname @@ -127,20 +127,33 @@ async def upload(self, src: str, dst: str) -> None: msg = f"Source is not a file: {src}" raise CLIError(msg) - dst_parent = str(Path(dst).parent) + # Destination path is always POSIX inside the guest. + dst_parent = str(PurePosixPath(dst).parent) + tmp = f"/tmp/.upload-{uuid.uuid4().hex}" # noqa: S108 + sudo_prefix = "" try: - await self._vm.shell(f"sudo mkdir -p {shlex.quote(dst_parent)}") + sudo_probe = await self._vm.shell("command -v sudo >/dev/null 2>&1") + sudo_prefix = "sudo " if sudo_probe.exit_code == 0 else "" + mk_result = await self._vm.shell(f"{sudo_prefix}mkdir -p {shlex.quote(dst_parent)}") + if mk_result.exit_code != 0: + msg = mk_result.stderr or mk_result.stdout or f"Failed to create upload directory: {dst_parent}" + raise CLIError(msg) # Copy to /tmp first (always writable), then sudo mv into place. # This works regardless of destination directory ownership. - tmp = f"/tmp/.upload-{uuid.uuid4().hex}" # noqa: S108 await self._vm.copy(src, tmp, host_to_guest=True) - await self._vm.shell( - f"sudo mv {tmp} {shlex.quote(dst)} && " - f"sudo chown {self._session_name}:{self._session_name} {shlex.quote(dst)} && " - f"sudo chmod 644 {shlex.quote(dst)}" + stage_result = await self._vm.shell( + f"{sudo_prefix}mv {tmp} {shlex.quote(dst)} && " + f"{sudo_prefix}chown {self._session_name}:{self._session_name} {shlex.quote(dst)} && " + f"{sudo_prefix}chmod 644 {shlex.quote(dst)}" ) + if stage_result.exit_code != 0: + msg = stage_result.stderr or stage_result.stdout or f"Failed to stage uploaded file: {dst}" + raise CLIError(msg) except VMError as e: raise CLIError(str(e)) from e + finally: + # Best-effort cleanup when stage command failed before move. + await self._vm.shell(f"{sudo_prefix}rm -f {tmp}") async def download(self, src: str, dst: str) -> None: """Transfer a file from the VM session to the host. @@ -154,8 +167,11 @@ async def download(self, src: str, dst: str) -> None: self._check_active() Path(dst).parent.mkdir(parents=True, exist_ok=True) tmp = f"/tmp/.download-{uuid.uuid4().hex}" # noqa: S108 + sudo_prefix = "" try: - result = await self._vm.shell(f"sudo cp {shlex.quote(src)} {tmp} && sudo chmod 644 {tmp}") + sudo_probe = await self._vm.shell("command -v sudo >/dev/null 2>&1") + sudo_prefix = "sudo " if sudo_probe.exit_code == 0 else "" + result = await self._vm.shell(f"{sudo_prefix}cp {shlex.quote(src)} {tmp} && {sudo_prefix}chmod 644 {tmp}") if result.exit_code != 0: msg = result.stderr or result.stdout or f"Failed to stage {src} for download" raise CLIError(msg) @@ -164,7 +180,7 @@ async def download(self, src: str, dst: str) -> None: raise CLIError(str(e)) from e finally: # Best-effort cleanup of the temp file inside the guest. - await self._vm.shell(f"sudo rm -f {tmp}") + await self._vm.shell(f"{sudo_prefix}rm -f {tmp}") def _check_active(self) -> None: """Raise if handle is inactive.""" diff --git a/libs/hexagent/hexagent/computer/local/vm_win.py b/libs/hexagent/hexagent/computer/local/vm_win.py index 890433a2..3516c382 100644 --- a/libs/hexagent/hexagent/computer/local/vm_win.py +++ b/libs/hexagent/hexagent/computer/local/vm_win.py @@ -30,7 +30,7 @@ import os import shlex import uuid -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING import petname @@ -154,20 +154,33 @@ async def upload(self, src: str, dst: str) -> None: msg = f"Source is not a file: {src}" raise CLIError(msg) - dst_parent = str(Path(dst).parent) + # Destination path is always a Linux path; keep POSIX semantics on Windows. + dst_parent = str(PurePosixPath(dst).parent) + tmp = f"/tmp/.upload-{uuid.uuid4().hex}" # noqa: S108 + sudo_prefix = "" try: - await self._vm.shell(f"sudo mkdir -p {shlex.quote(dst_parent)}") + sudo_probe = await self._vm.shell("command -v sudo >/dev/null 2>&1") + sudo_prefix = "sudo " if sudo_probe.exit_code == 0 else "" + mk_result = await self._vm.shell(f"{sudo_prefix}mkdir -p {shlex.quote(dst_parent)}") + if mk_result.exit_code != 0: + msg = mk_result.stderr or mk_result.stdout or f"Failed to create upload directory: {dst_parent}" + raise CLIError(msg) # Copy to /tmp first (always writable), then sudo mv into place. # This works regardless of destination directory ownership. - tmp = f"/tmp/.upload-{uuid.uuid4().hex}" # noqa: S108 await self._vm.copy(src, tmp, host_to_guest=True) - await self._vm.shell( - f"sudo mv {tmp} {shlex.quote(dst)} && " - f"sudo chown {self._session_name}:{self._session_name} {shlex.quote(dst)} && " - f"sudo chmod 644 {shlex.quote(dst)}" + stage_result = await self._vm.shell( + f"{sudo_prefix}mv {tmp} {shlex.quote(dst)} && " + f"{sudo_prefix}chown {self._session_name}:{self._session_name} {shlex.quote(dst)} && " + f"{sudo_prefix}chmod 644 {shlex.quote(dst)}" ) + if stage_result.exit_code != 0: + msg = stage_result.stderr or stage_result.stdout or f"Failed to stage uploaded file: {dst}" + raise CLIError(msg) except VMError as e: raise CLIError(str(e)) from e + finally: + # Best-effort cleanup when stage command failed before move. + await self._vm.shell(f"{sudo_prefix}rm -f {tmp}") async def download(self, src: str, dst: str) -> None: """Transfer a file from the WSL session to the host. @@ -179,8 +192,11 @@ async def download(self, src: str, dst: str) -> None: self._check_active() Path(dst).parent.mkdir(parents=True, exist_ok=True) tmp = f"/tmp/.download-{uuid.uuid4().hex}" # noqa: S108 + sudo_prefix = "" try: - result = await self._vm.shell(f"sudo cp {shlex.quote(src)} {tmp} && sudo chmod 644 {tmp}") + sudo_probe = await self._vm.shell("command -v sudo >/dev/null 2>&1") + sudo_prefix = "sudo " if sudo_probe.exit_code == 0 else "" + result = await self._vm.shell(f"{sudo_prefix}cp {shlex.quote(src)} {tmp} && {sudo_prefix}chmod 644 {tmp}") if result.exit_code != 0: msg = result.stderr or result.stdout or f"Failed to stage {src} for download" raise CLIError(msg) @@ -189,7 +205,7 @@ async def download(self, src: str, dst: str) -> None: raise CLIError(str(e)) from e finally: # Best-effort cleanup of the temp file inside the guest. - await self._vm.shell(f"sudo rm -f {tmp}") + await self._vm.shell(f"{sudo_prefix}rm -f {tmp}") def _check_active(self) -> None: """Raise if handle is inactive.""" diff --git a/libs/hexagent/hexagent/mcp/_client.py b/libs/hexagent/hexagent/mcp/_client.py index 8536e912..e21e93b7 100644 --- a/libs/hexagent/hexagent/mcp/_client.py +++ b/libs/hexagent/hexagent/mcp/_client.py @@ -166,7 +166,10 @@ async def _open_transport(self) -> tuple[Any, Any]: if transport_type == "http": http_cfg = cast("McpHttpServerConfig", config) http_client = await self._exit_stack.enter_async_context( - httpx.AsyncClient(headers=dict(http_cfg.get("headers", {}))), + httpx.AsyncClient( + headers=dict(http_cfg.get("headers", {})), + timeout=httpx.Timeout(300, connect=10), + ), ) read_stream, write_stream, _ = await self._exit_stack.enter_async_context( streamable_http_client(http_cfg["url"], http_client=http_client), diff --git a/libs/hexagent/hexagent/tools/ui/present_to_user.py b/libs/hexagent/hexagent/tools/ui/present_to_user.py index 7efeb826..c533bd15 100644 --- a/libs/hexagent/hexagent/tools/ui/present_to_user.py +++ b/libs/hexagent/hexagent/tools/ui/present_to_user.py @@ -125,11 +125,10 @@ def _build_case_block() -> str: return "\n".join(arms) -# The bash script body template. ``{case_arms}`` is replaced at import -# time with the generated case block. $1 is the output directory; -# $2.. are file paths. +# The bash script body template. ``{case_arms}`` is replaced at import +# time with the generated case block. ``OUTPUT_DIR`` is injected by +# ``_build_command`` and file paths are passed via ``$@``. _SCRIPT_BODY = r""" -OUTPUT_DIR="$1"; shift mkdir -p "$OUTPUT_DIR" REAL_OUT="$(realpath "$OUTPUT_DIR")" @@ -204,8 +203,14 @@ def _build_command(filepaths: list[str], output_dir: str) -> str: Returns: A shell command string safe for ``Computer.run()``. """ - quoted_args = " ".join(shlex.quote(p) for p in [output_dir, *filepaths]) - return f"bash -c {shlex.quote(_SCRIPT_BODY_LF)} _ {quoted_args}" + quoted_file_args = " ".join(shlex.quote(p) for p in filepaths) + set_args = f"set -- {quoted_file_args}" if quoted_file_args else "set --" + script = f"OUTPUT_DIR={shlex.quote(output_dir)}\n{set_args}\n{_SCRIPT_BODY_LF}" + # WSL can evaluate one outer shell layer before the intended ``bash -c`` + # command, which would eagerly expand ``$...`` and break the script. + # Pre-escape dollars so expansion happens only in the inner bash. + script_for_outer = script.replace("$", r"\$") + return f"bash -c {shlex.quote(script_for_outer)}" class PresentToUserTool(BaseAgentTool[PresentToUserToolParams]): diff --git a/libs/hexagent/tests/unit_tests/computer/test_vm.py b/libs/hexagent/tests/unit_tests/computer/test_vm.py index 90e0a7eb..eb0f83c1 100644 --- a/libs/hexagent/tests/unit_tests/computer/test_vm.py +++ b/libs/hexagent/tests/unit_tests/computer/test_vm.py @@ -216,7 +216,7 @@ async def test_upload_copies_via_tmp_then_moves(self, tmp_path: Path) -> None: assert copy_call.kwargs.get("host_to_guest") is True # Should sudo mv from tmp to destination, chown to session user, and chmod 644 - mv_call = vm.shell.call_args_list[1] + mv_call = next(c for c in vm.shell.call_args_list if " mv " in c.args[0]) assert "sudo mv" in mv_call.args[0] assert "/remote/file.txt" in mv_call.args[0] assert "chown test-session:test-session" in mv_call.args[0] @@ -232,7 +232,7 @@ async def test_upload_creates_parent_dir_on_guest(self, tmp_path: Path) -> None: await computer.upload(str(src), "/remote/deep/file.txt") - mkdir_call = vm.shell.call_args_list[0] + mkdir_call = next(c for c in vm.shell.call_args_list if "mkdir -p" in c.args[0]) assert "sudo mkdir -p" in mkdir_call.args[0] assert "/remote/deep" in mkdir_call.args[0] diff --git a/libs/hexagent/tests/unit_tests/computer/test_wsl.py b/libs/hexagent/tests/unit_tests/computer/test_wsl.py index f2ed9766..164e1ae1 100644 --- a/libs/hexagent/tests/unit_tests/computer/test_wsl.py +++ b/libs/hexagent/tests/unit_tests/computer/test_wsl.py @@ -1,7 +1,7 @@ # ruff: noqa: PLR2004 S108 ARG005 UP012 """Tests for WslVM and _VMSessionComputer (Windows variant). -All tests mock the WSL backend — no wsl.exe or WSL2 required. +All tests mock the WSL backend - no wsl.exe or WSL2 required. """ from __future__ import annotations @@ -19,6 +19,7 @@ from hexagent.computer.local._types import ResolvedMount from hexagent.computer.local._wsl import ( WslVM, + _decode_wsl_output, _parse_status_output, _session_user_from_guest_mount_path, _win_path_to_wsl, @@ -243,12 +244,26 @@ async def test_upload_copies_via_tmp_then_moves(self, tmp_path: Path) -> None: assert copy_call.args[1].startswith("/tmp/.upload-") assert copy_call.kwargs.get("host_to_guest") is True - mv_call = vm.shell.call_args_list[1] + mv_call = next(c for c in vm.shell.call_args_list if " mv " in c.args[0]) assert "sudo mv" in mv_call.args[0] assert "/remote/file.txt" in mv_call.args[0] assert "chown test-session:test-session" in mv_call.args[0] assert "chmod 644" in mv_call.args[0] + async def test_upload_uses_posix_parent_for_session_paths(self, tmp_path: Path) -> None: + vm = _mock_vm() + vm.copy = AsyncMock() + computer = _make_computer(vm) + + src = tmp_path / "file.txt" + src.write_text("data") + + await computer.upload(str(src), "/sessions/alice/mnt/uploads/file.txt") + + mkdir_call = next(c for c in vm.shell.call_args_list if "mkdir -p" in c.args[0]) + assert "/sessions/alice/mnt/uploads" in mkdir_call.args[0] + assert "\\sessions\\alice\\mnt\\uploads" not in mkdir_call.args[0] + async def test_upload_missing_src_raises_file_not_found(self, tmp_path: Path) -> None: vm = _mock_vm() computer = _make_computer(vm) @@ -288,7 +303,7 @@ async def test_download_stages_via_tmp(self, tmp_path: Path) -> None: await computer.download("/remote/file.txt", str(dst)) # First shell call: sudo cp to tmp + chmod - stage_call = vm.shell.call_args_list[0] + stage_call = next(c for c in vm.shell.call_args_list if " cp " in c.args[0]) assert "sudo cp" in stage_call.args[0] assert "chmod 644" in stage_call.args[0] @@ -340,7 +355,7 @@ def test_satisfies_computer_protocol(self) -> None: # =========================================================================== -# WslVM — pure logic only (no subprocess) +# WslVM - pure logic only (no subprocess) # =========================================================================== @@ -419,6 +434,30 @@ async def test_start_does_not_retry_on_non_transient_failure(self) -> None: mock_apply.assert_not_awaited() +# =========================================================================== +# WSL output decoding +# =========================================================================== + + +class TestDecodeWslOutput: + """Tests for mixed-encoding stderr decoding.""" + + def test_utf8_plain(self) -> None: + assert _decode_wsl_output("hello".encode("utf-8")) == "hello" + + def test_utf16le_with_bom(self) -> None: + raw = b"\xff\xfe" + "warning: test".encode("utf-16-le") + assert "warning: test" in _decode_wsl_output(raw) + + def test_mixed_utf16le_prefix_and_utf8_tail(self) -> None: + prefix = "wsl: localhost proxy config detected but not mirrored to WSL.\r\n".encode("utf-16-le") + tail = b"/bin/bash: line 1: _mime_by_ext: command not found\n" + text = _decode_wsl_output(prefix + tail) + + assert "localhost proxy config detected" in text + assert "_mime_by_ext: command not found" in text + + # =========================================================================== # Status output parsing # =========================================================================== diff --git a/libs/hexagent/tests/unit_tests/tools/ui/test_present_to_user.py b/libs/hexagent/tests/unit_tests/tools/ui/test_present_to_user.py index 3e68db1f..8938bcce 100644 --- a/libs/hexagent/tests/unit_tests/tools/ui/test_present_to_user.py +++ b/libs/hexagent/tests/unit_tests/tools/ui/test_present_to_user.py @@ -165,6 +165,16 @@ def test_embedded_script_normalized_to_lf(self) -> None: cmd = _build_command(["/a.txt"], "/out") assert "\r" not in cmd + def test_uses_inner_bash_c_without_positional_arg_shim(self) -> None: + cmd = _build_command(["/a.txt"], "/out") + assert "bash -c" in cmd + assert " _ " not in cmd + assert "OUTPUT_DIR=/out" in cmd + + def test_escapes_dollar_for_wsl_outer_shell(self) -> None: + cmd = _build_command(["/a.txt"], "/out") + assert r"\$OUTPUT_DIR" in cmd + # --------------------------------------------------------------------------- # _EXT_MIME_MAP / generated script tests diff --git a/libs/hexagent_demo/backend/hexagent_api/routes/setup.py b/libs/hexagent_demo/backend/hexagent_api/routes/setup.py index 4a2137d0..3fc6d878 100644 --- a/libs/hexagent_demo/backend/hexagent_api/routes/setup.py +++ b/libs/hexagent_demo/backend/hexagent_api/routes/setup.py @@ -244,6 +244,15 @@ def _looks_like_missing_wsl_disk(msg: str) -> bool: ) +def _looks_like_wsl_localhost_proxy_warning(msg: str) -> bool: + """Return True for known non-fatal WSL localhost-proxy warning text.""" + text = (msg or "").lower() + return ( + ("localhost" in text and "proxy" in text and "wsl" in text and "nat" in text) + or ("localhost 代理" in (msg or "") and "未镜像到 wsl" in (msg or "")) + ) + + def _wsl2_blocker_reason(text: str) -> str | None: """Return a friendly reason when host cannot run WSL2.""" t = (text or "").lower() @@ -396,6 +405,23 @@ async def _wsl_probe_start() -> tuple[bool, str]: return False, _combine_wsl_output(stdout_b, stderr_b) +async def _wait_for_wsl_vhdx(import_dir: Path, timeout_s: float = 45.0) -> Path | None: + """Wait until WSL import materializes ``ext4.vhdx`` under ``import_dir``. + + On some Windows hosts `wsl --import` returns before the VHDX file is fully + visible to subsequent `wsl -d` start attempts, which can cause transient + `MountDisk ... ERROR_PATH_NOT_FOUND`. + """ + target = import_dir / "ext4.vhdx" + loop = asyncio.get_running_loop() + deadline = loop.time() + timeout_s + while loop.time() < deadline: + if target.is_file(): + return target + await asyncio.sleep(0.5) + return None + + # ``wsl -l -v`` uses the Windows display language for the STATE column. # Cowork only needs the ``hexagent`` distro to exist; WSL starts it on demand. _WSL_COWORK_READY_STATES = frozenset( @@ -1190,12 +1216,14 @@ async def _run_wsl(self) -> None: self._error = f"exit {proc_import.returncode}" return + self._emit("progress", {"step": "starting", "message": "Finalizing imported WSL disk..."}) + await _wait_for_wsl_vhdx(import_dir) self._emit("progress", {"step": "starting", "message": "Starting imported HexAgent WSL distro..."}) ok, err = await self._start_wsl_instance( wsl_exe, step="starting", message="Starting imported HexAgent WSL distro...", - retries_on_missing_disk=3, + retries_on_missing_disk=6, ) if ok: self._emit("done", {"message": "WSL distro imported from bundled image and started successfully"}) @@ -1287,12 +1315,14 @@ async def _run_wsl(self) -> None: self._error = f"exit {proc_import.returncode}" return + self._emit("progress", {"step": "starting", "message": "Finalizing imported WSL disk..."}) + await _wait_for_wsl_vhdx(import_dir) self._emit("progress", {"step": "starting", "message": "Starting HexAgent WSL distro..."}) ok, err = await self._start_wsl_instance( wsl_exe, step="starting", message="Starting HexAgent WSL distro...", - retries_on_missing_disk=3, + retries_on_missing_disk=6, ) if ok: self._emit("done", {"message": "WSL distro created and started successfully"}) @@ -1504,10 +1534,40 @@ async def _run_wsl(self, **kwargs: object) -> None: user="root", ) if rc != 0: - self._emit("error", {"message": f"Failed to stage setup files in WSL: {err}"}) - self._status = "error" - self._error = "Stage failed" - return + # Some WSL builds emit a localhost-proxy warning under NAT mode, + # and may still finish staging. Verify before failing hard. + if _looks_like_wsl_localhost_proxy_warning(err): + verify_rc, _, verify_err = await _wsl_shell( + f"test -f {setup_vm_dir_quoted}/setup.sh && test -d {setup_vm_dir_quoted}/steps", + timeout=15, + user="root", + ) + if verify_rc == 0: + self._emit( + "progress", + { + "step": "copying", + "message": "WSL reported localhost proxy warning, but setup files were staged successfully. Continuing...", + }, + ) + else: + self._emit( + "error", + { + "message": ( + f"Failed to stage setup files in WSL: {err}" + + (f"\nVerification error: {verify_err}" if verify_err else "") + ) + }, + ) + self._status = "error" + self._error = "Stage failed" + return + else: + self._emit("error", {"message": f"Failed to stage setup files in WSL: {err}"}) + self._status = "error" + self._error = "Stage failed" + return self._emit("progress", {"step": "starting", "message": "Starting provisioning..."}) cmd = f"bash {_SETUP_VM_DIR}/setup.sh" diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/LICENSE.txt b/libs/hexagent_demo/backend/skills/email-mail-master/LICENSE.txt new file mode 100644 index 00000000..6d0c1da7 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/email-mail-master/LICENSE.txt @@ -0,0 +1,24 @@ +MIT License + +Copyright (c) 2026 Mail-Master + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- + diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/SKILL.md b/libs/hexagent_demo/backend/skills/email-mail-master/SKILL.md new file mode 100644 index 00000000..2970d846 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/email-mail-master/SKILL.md @@ -0,0 +1,58 @@ +--- +name: email-mail-master 万能邮箱助手 +description: 通过阿里云邮箱、QQ邮箱或163邮箱等发送和接收邮件。支持发送普通邮件、带附件邮件、接收邮件、检查新邮件。当用户要求发送邮件、查看邮件、检查新邮件时使用。 + +--- + +# 邮件管理 + +通过阿里云邮箱、QQ邮箱或163邮箱等发送和接收邮件。 + +## 配置 + +编辑 `skills/email/scripts/config.json`,填写邮箱地址和授权码(非登录密码)。 + +授权码获取: +- QQ 邮箱:设置 > 账户 > 开启 IMAP/SMTP > 生成授权码 +- 163 邮箱:设置 > POP3/SMTP/IMAP > 开启服务 > 设置授权密码 + +可通过 `default_mailbox` 字段设置默认邮箱(`"qq"` 或 `"163"`)。 + +## 命令行调用 + +```bash +# 发送邮件 +python3 skills/email/scripts/mail.py send --to user@example.com --subject "主题" --content "内容" + +# 发送带附件 +python3 skills/email/scripts/mail.py send --to user@example.com --subject "报告" --content "请查收" --attach report.pdf + +# 接收最新邮件 +python3 skills/email/scripts/mail.py receive --limit 5 + +# 接收邮件(JSON 输出,推荐 AI 使用) +python3 skills/email/scripts/mail.py receive --limit 5 --json + +# 检查新邮件(最近 N 天) +python3 skills/email/scripts/mail.py check-new --since 1 + +# 检查新邮件(JSON 输出) +python3 skills/email/scripts/mail.py check-new --since 1 --json + +# 删除邮件(移到已删除文件夹,QQ邮箱可恢复) +python3 skills/email/scripts/mail.py delete --ids 123 + +# 批量删除 +python3 skills/email/scripts/mail.py delete --ids 123 124 125 + +# 彻底删除(不可恢复) +python3 skills/email/scripts/mail.py delete --ids 123 --permanent + +# 指定邮箱类型 +python3 skills/email/scripts/mail.py --mailbox 163 send --to user@example.com --subject "测试" +``` + +## 删除邮件说明 + +- QQ 邮箱(IMAP):默认移到「已删除」文件夹,可以从已删除中恢复。加 `--permanent` 彻底删除。 +- 163 邮箱(POP3):POP3 协议不支持文件夹操作,删除始终是永久的,不可恢复。 diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/_meta.json b/libs/hexagent_demo/backend/skills/email-mail-master/_meta.json new file mode 100644 index 00000000..f1c5341d --- /dev/null +++ b/libs/hexagent_demo/backend/skills/email-mail-master/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn7bb6tkjndgyzp6fh2vgvagq982ee7c", + "slug": "email-mail-master", + "version": "1.0.0", + "publishedAt": 1772888109581 +} \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/email_manager.py b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/email_manager.py new file mode 100644 index 00000000..8bee782a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/email_manager.py @@ -0,0 +1,557 @@ +"""邮箱管理核心模块""" +import imaplib +import poplib +import smtplib +import email +from email.header import decode_header, Header +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from datetime import datetime, timedelta +import json +import os +from typing import List, Dict, Optional + + +class EmailManager: + """邮箱管理器基类""" + + def __init__(self, email_address: str, password: str, + imap_server: str, imap_port: int, + smtp_server: str, smtp_port: int): + self.email_address = email_address + self.password = password + self.imap_server = imap_server + self.imap_port = imap_port + self.smtp_server = smtp_server + self.smtp_port = smtp_port + + def decode_str(self, s): + """解码邮件头部字符串""" + if s is None: + return "" + + value, charset = decode_header(s)[0] + if charset: + try: + value = value.decode(charset) + except: + try: + value = value.decode('utf-8', errors='ignore') + except: + value = str(value) + elif isinstance(value, bytes): + try: + value = value.decode('utf-8', errors='ignore') + except: + value = str(value) + return value + + def get_email_content(self, msg): + """获取邮件正文内容(仅纯文本,HTML 转纯文本)""" + import re + from html import unescape + + content = "" + + if msg.is_multipart(): + # 优先查找 text/plain 部分 + for part in msg.walk(): + content_type = part.get_content_type() + if content_type == 'text/plain': + try: + payload = part.get_payload(decode=True) + charset = part.get_content_charset() or 'utf-8' + content = payload.decode(charset, errors='ignore') + break + except: + pass + + # 如果没有纯文本,尝试从 HTML 提取 + if not content: + for part in msg.walk(): + content_type = part.get_content_type() + if content_type == 'text/html': + try: + payload = part.get_payload(decode=True) + charset = part.get_content_charset() or 'utf-8' + html_content = payload.decode(charset, errors='ignore') + content = self._html_to_text(html_content) + break + except: + pass + else: + # 单部分邮件 + try: + payload = msg.get_payload(decode=True) + charset = msg.get_content_charset() or 'utf-8' + raw_content = payload.decode(charset, errors='ignore') + + # 根据 Content-Type 处理 + if msg.get_content_type() == 'text/html': + content = self._html_to_text(raw_content) + else: + content = raw_content + except: + pass + + return content.strip() + + def _html_to_text(self, html_content: str) -> str: + """将 HTML 转换为纯文本""" + import re + from html import unescape + + # 移除 script 和 style 标签及其内容 + text = re.sub(r']*>', '', html_content, flags=re.DOTALL | re.IGNORECASE) + text = re.sub(r']*>', '', text, flags=re.DOTALL | re.IGNORECASE) + + # 移除所有 HTML 标签 + text = re.sub(r'<[^>]+>', ' ', text) + + # 解码 HTML 实体 + text = unescape(text) + + # 清理多余空白 + text = re.sub(r'\s+', ' ', text) + text = re.sub(r'\n\s*\n', '\n', text) + + return text.strip() + + def receive_emails(self, mailbox: str = 'INBOX', limit: int = 10) -> List[Dict]: + """接收邮件""" + try: + mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) + mail.login(self.email_address, self.password) + mail.select(mailbox) + + status, messages = mail.search(None, 'ALL') + email_ids = messages[0].split() + + emails = [] + for email_id in email_ids[-limit:]: + status, msg_data = mail.fetch(email_id, '(RFC822)') + + for response_part in msg_data: + if isinstance(response_part, tuple): + msg = email.message_from_bytes(response_part[1]) + + subject = self.decode_str(msg.get('Subject', '')) + from_ = self.decode_str(msg.get('From', '')) + date = msg.get('Date', '') + content = self.get_email_content(msg) + + emails.append({ + 'id': email_id.decode(), + 'subject': subject, + 'from': from_, + 'date': date, + 'content': content[:200] + '...' if len(content) > 200 else content + }) + + mail.close() + mail.logout() + + return emails + + except Exception as e: + raise Exception(f"接收邮件失败: {str(e)}") + + def receive_emails_since(self, since_date: datetime, mailbox: str = 'INBOX') -> List[Dict]: + """接收指定日期之后的邮件""" + try: + from email.utils import parsedate_to_datetime + + mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) + mail.login(self.email_address, self.password) + mail.select(mailbox) + + # IMAP SINCE 精度只到天,需要客户端二次过滤 + date_str = since_date.strftime('%d-%b-%Y') + status, messages = mail.search(None, f'(SINCE {date_str})') + email_ids = messages[0].split() + + emails = [] + for email_id in email_ids: + status, msg_data = mail.fetch(email_id, '(RFC822)') + + for response_part in msg_data: + if isinstance(response_part, tuple): + msg = email.message_from_bytes(response_part[1]) + + # 客户端精确过滤:解析邮件日期,跳过早于 since_date 的 + raw_date = msg.get('Date', '') + try: + email_dt = parsedate_to_datetime(raw_date) + # 统一为 naive datetime 比较(去掉时区信息) + if email_dt.tzinfo: + email_dt = email_dt.replace(tzinfo=None) + if email_dt < since_date: + continue + except Exception: + pass # 无法解析日期的邮件仍然保留 + + subject = self.decode_str(msg.get('Subject', '')) + from_ = self.decode_str(msg.get('From', '')) + content = self.get_email_content(msg) + + emails.append({ + 'id': email_id.decode(), + 'subject': subject, + 'from': from_, + 'date': raw_date, + 'content': content[:200] + '...' if len(content) > 200 else content + }) + + mail.close() + mail.logout() + + return emails + + except Exception as e: + raise Exception(f"接收邮件失败: {str(e)}") + + def send_email(self, to_addr: str, subject: str, content: str, + content_type: str = 'plain', attachments: List[str] = None) -> str: + """发送邮件 + + Args: + to_addr: 收件人邮箱 + subject: 邮件主题 + content: 邮件内容 + content_type: 内容类型 ('plain' 或 'html') + attachments: 附件文件路径列表 + + Returns: + 发送结果消息 + """ + try: + from email.mime.application import MIMEApplication + import os + + message = MIMEMultipart() + message['From'] = Header(self.email_address) + message['To'] = Header(to_addr) + message['Subject'] = Header(subject, 'utf-8') + + # 添加邮件正文 + message.attach(MIMEText(content, content_type, 'utf-8')) + + # 添加附件 + if attachments: + for file_path in attachments: + if not os.path.exists(file_path): + raise FileNotFoundError(f"附件文件不存在: {file_path}") + + with open(file_path, 'rb') as f: + attachment = MIMEApplication(f.read()) + filename = os.path.basename(file_path) + attachment.add_header( + 'Content-Disposition', + 'attachment', + filename=('utf-8', '', filename) + ) + message.attach(attachment) + + server = smtplib.SMTP_SSL(self.smtp_server, self.smtp_port) + server.login(self.email_address, self.password) + server.sendmail(self.email_address, [to_addr], message.as_string()) + server.quit() + + result = "邮件发送成功!" + if attachments: + result += f" (包含 {len(attachments)} 个附件)" + return result + + except Exception as e: + raise Exception(f"发送邮件失败: {str(e)}") + + def delete_email(self, email_id: str, mailbox: str = 'INBOX', permanent: bool = False) -> str: + """删除邮件(IMAP) + + 默认移到「已删除」文件夹(可在30天内恢复),permanent=True 则彻底删除(不可恢复)。 + + Args: + email_id: 邮件 ID(receive_emails 返回的 id 字段) + mailbox: 邮箱文件夹,默认 INBOX + permanent: 是否彻底删除(expunge) + - False: 移到"已删除"文件夹(可恢复) + - True: 彻底删除(不可恢复) + + Returns: + 操作结果消息 + + 注意: + - QQ邮箱: permanent=False 时邮件移到"已删除"文件夹,可在30天内恢复 + - 163邮箱: 使用POP3协议,删除操作始终是永久的 + """ + try: + mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) + mail.login(self.email_address, self.password) + mail.select(mailbox) + + # 标记为删除 + mail.store(email_id.encode(), '+FLAGS', '\\Deleted') + + if permanent: + mail.expunge() + result = f"✓ 邮件 {email_id} 已彻底删除(不可恢复)" + else: + result = f"✓ 邮件 {email_id} 已移到已删除文件夹(可在30天内从已删除文件夹恢复)" + + mail.close() + mail.logout() + return result + + except Exception as e: + raise Exception(f"删除邮件失败: {str(e)}") + + def delete_emails_batch(self, email_ids: List[str], mailbox: str = 'INBOX', permanent: bool = False) -> str: + """批量删除邮件(IMAP) + + Args: + email_ids: 邮件 ID 列表 + mailbox: 邮箱文件夹,默认 INBOX + permanent: 是否彻底删除 + - False: 标记为删除,移到"已删除"文件夹(可恢复) + - True: 彻底删除,立即从服务器移除(不可恢复) + + Returns: + 操作结果消息 + + 注意: + - QQ邮箱: permanent=False 时邮件移到"已删除"文件夹,可在30天内恢复 + - 163邮箱: 使用POP3协议,删除操作始终是永久的 + """ + try: + mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port) + mail.login(self.email_address, self.password) + mail.select(mailbox) + + # 批量标记为删除 + for eid in email_ids: + mail.store(eid.encode(), '+FLAGS', '\\Deleted') + + if permanent: + mail.expunge() + action = "彻底删除(不可恢复)" + else: + action = "移到已删除文件夹(可在30天内恢复)" + + mail.close() + mail.logout() + + return f"✓ 已{action} {len(email_ids)} 封邮件" + + except Exception as e: + raise Exception(f"批量删除邮件失败: {str(e)}") + + +class QQEmailManager(EmailManager): + """QQ 邮箱管理器 - 使用 IMAP""" + + def __init__(self, email_address: str, auth_code: str): + super().__init__( + email_address=email_address, + password=auth_code, + imap_server='imap.qq.com', + imap_port=993, + smtp_server='smtp.qq.com', + smtp_port=465 + ) + + +class Email163Manager(EmailManager): + """163 邮箱管理器 - 使用 POP3(因为 IMAP 有安全限制)""" + + def __init__(self, email_address: str, auth_password: str): + super().__init__( + email_address=email_address, + password=auth_password, + imap_server='pop.163.com', # 使用 POP3 + imap_port=995, + smtp_server='smtp.163.com', + smtp_port=465 + ) + self.pop_server = 'pop.163.com' + self.pop_port = 995 + + def receive_emails(self, mailbox: str = 'INBOX', limit: int = 10) -> List[Dict]: + """接收邮件 - 使用 POP3""" + try: + # 连接 POP3 + pop = poplib.POP3_SSL(self.pop_server, self.pop_port) + pop.user(self.email_address) + pop.pass_(self.password) + + # 获取邮件数量 + num_messages = len(pop.list()[1]) + + emails = [] + # 获取最新的 limit 封邮件 + start = max(1, num_messages - limit + 1) + for i in range(start, num_messages + 1): + try: + response, lines, octets = pop.retr(i) + + # 解析邮件 + msg_content = b'\r\n'.join(lines) + msg = email.message_from_bytes(msg_content) + + subject = self.decode_str(msg.get('Subject', '')) + from_ = self.decode_str(msg.get('From', '')) + date = msg.get('Date', '') + content = self.get_email_content(msg) + + emails.append({ + 'id': str(i), + 'subject': subject, + 'from': from_, + 'date': date, + 'content': content + }) + except Exception as e: + # 跳过无法解析的邮件 + continue + + pop.quit() + return emails + + except Exception as e: + raise Exception(f"接收邮件失败: {str(e)}") + + def receive_emails_since(self, since_date: datetime, mailbox: str = 'INBOX') -> List[Dict]: + """接收指定日期之后的邮件 - 使用 POP3""" + try: + # 连接 POP3 + pop = poplib.POP3_SSL(self.pop_server, self.pop_port) + pop.user(self.email_address) + pop.pass_(self.password) + + # 获取邮件数量 + num_messages = len(pop.list()[1]) + + emails = [] + # 从最新的邮件开始检查 + for i in range(num_messages, 0, -1): + try: + response, lines, octets = pop.retr(i) + + # 解析邮件 + msg_content = b'\r\n'.join(lines) + msg = email.message_from_bytes(msg_content) + + # 解析日期 + date_str = msg.get('Date', '') + try: + from email.utils import parsedate_to_datetime + email_date = parsedate_to_datetime(date_str) + + # 如果邮件日期早于指定日期,停止检查 + if email_date < since_date: + break + except: + # 如果无法解析日期,跳过 + continue + + subject = self.decode_str(msg.get('Subject', '')) + from_ = self.decode_str(msg.get('From', '')) + content = self.get_email_content(msg) + + emails.append({ + 'id': str(i), + 'subject': subject, + 'from': from_, + 'date': date_str, + 'content': content + }) + except Exception as e: + # 跳过无法解析的邮件 + continue + + pop.quit() + # 反转列表,使最新的邮件在最后 + return list(reversed(emails)) + + except Exception as e: + raise Exception(f"接收邮件失败: {str(e)}") + + def delete_email(self, email_id: str, mailbox: str = 'INBOX', permanent: bool = False) -> str: + """删除邮件(POP3) + + 注意:POP3 协议的删除始终是永久的,不可恢复。 + permanent 参数对 POP3 无效,仅为保持接口一致性。 + """ + try: + pop = poplib.POP3_SSL(self.pop_server, self.pop_port) + pop.user(self.email_address) + pop.pass_(self.password) + + pop.dele(int(email_id)) + pop.quit() + + return f"✓ 邮件 {email_id} 已永久删除(POP3协议不支持恢复,已从服务器移除)" + + except Exception as e: + raise Exception(f"删除邮件失败: {str(e)}") + + def delete_emails_batch(self, email_ids: List[str], mailbox: str = 'INBOX', permanent: bool = False) -> str: + """批量删除邮件(POP3) + + 注意:POP3 协议的删除始终是永久的,不可恢复。 + permanent 参数对 POP3 无效,仅为保持接口一致性。 + """ + try: + pop = poplib.POP3_SSL(self.pop_server, self.pop_port) + pop.user(self.email_address) + pop.pass_(self.password) + + for eid in email_ids: + pop.dele(int(eid)) + + pop.quit() + return f"✓ 已永久删除 {len(email_ids)} 封邮件(POP3协议不支持恢复,已从服务器移除)" + + except Exception as e: + raise Exception(f"批量删除邮件失败: {str(e)}") + + +def load_config(config_path: str = None) -> Dict: + """加载配置文件""" + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + + if not os.path.exists(config_path): + raise FileNotFoundError( + f"配置文件不存在: {config_path}\n" + f"请复制 config.json.example 为 config.json 并填写您的邮箱信息" + ) + + with open(config_path, 'r', encoding='utf-8') as f: + return json.load(f) + + +def save_config(config: Dict, config_path: str = None): + """保存配置文件""" + if config_path is None: + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(config, f, ensure_ascii=False, indent=2) + + +def get_email_manager(email_type: str, config: Dict) -> EmailManager: + """根据邮箱类型获取管理器""" + if email_type == 'qq': + email_config = config['qq_email'] + return QQEmailManager( + email_address=email_config['email'], + auth_code=email_config['auth_code'] + ) + elif email_type == '163': + email_config = config['163_email'] + return Email163Manager( + email_address=email_config['email'], + auth_password=email_config['auth_password'] + ) + else: + raise ValueError(f"不支持的邮箱类型: {email_type},请使用 'qq' 或 '163'") diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/mail.py b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/mail.py new file mode 100644 index 00000000..dc7a8b53 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/mail.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +"""非交互式邮件管理脚本""" +import argparse +import json +import sys +from datetime import datetime, timedelta +from email_manager import load_config, get_email_manager + + +def cmd_send(args): + """发送邮件""" + try: + config = load_config() + manager = get_email_manager(args.mailbox, config) + + attachments = args.attach if args.attach else None + + manager.send_email( + to_addr=args.to, + subject=args.subject, + content=args.content, + attachments=attachments + ) + + print(f"邮件已发送到 {args.to}") + if attachments: + print(f" 附件: {', '.join(attachments)}") + + except Exception as e: + print(f"发送失败: {e}", file=sys.stderr) + sys.exit(1) + + +def cmd_receive(args): + """接收邮件""" + try: + config = load_config() + manager = get_email_manager(args.mailbox, config) + + emails = manager.receive_emails(limit=args.limit) + + if not emails: + if args.json: + print(json.dumps({"count": 0, "emails": []}, ensure_ascii=False)) + else: + print("收件箱为空") + return + + if args.json: + print(json.dumps({ + "count": len(emails), + "emails": emails + }, ensure_ascii=False, indent=2)) + return + + print(f"收到 {len(emails)} 封邮件:\n") + for i, e in enumerate(emails, 1): + print(f"[{i}] {e['subject']}") + print(f" 发件人: {e['from']}") + print(f" 日期: {e['date']}") + print(f" 内容: {e['content'][:100]}...") + print() + + except Exception as e: + print(f"接收失败: {e}", file=sys.stderr) + sys.exit(1) + + +def cmd_check_new(args): + """检查新邮件""" + try: + config = load_config() + manager = get_email_manager(args.mailbox, config) + + since_date = datetime.now() - timedelta(days=args.since) + new_emails = manager.receive_emails_since(since_date) + + if not new_emails: + if args.json: + print(json.dumps({"count": 0, "since_days": args.since, "emails": []}, ensure_ascii=False)) + else: + print(f"没有新邮件(最近 {args.since} 天)") + return + + if args.json: + print(json.dumps({ + "count": len(new_emails), + "since_days": args.since, + "emails": [ + {"subject": e['subject'], "from": e['from'], "date": e['date']} + for e in new_emails + ] + }, ensure_ascii=False, indent=2)) + return + + print(f"找到 {len(new_emails)} 封新邮件(最近 {args.since} 天):\n") + for i, e in enumerate(new_emails, 1): + print(f"[{i}] {e['subject']}") + print(f" 发件人: {e['from']}") + print(f" 日期: {e['date']}") + print() + + except Exception as e: + print(f"检查失败: {e}", file=sys.stderr) + sys.exit(1) + + +def cmd_delete(args): + """删除邮件""" + try: + config = load_config() + manager = get_email_manager(args.mailbox, config) + + email_ids = args.ids + + if len(email_ids) == 1: + result = manager.delete_email(email_ids[0], permanent=args.permanent) + else: + result = manager.delete_emails_batch(email_ids, permanent=args.permanent) + + print(result) + + except Exception as e: + print(f"删除失败: {e}", file=sys.stderr) + sys.exit(1) + + +def main(): + # 加载配置以获取默认邮箱 + try: + config = load_config() + default_mailbox = config.get('default_mailbox', 'qq') + except: + default_mailbox = 'qq' + + parser = argparse.ArgumentParser( + description='邮件管理工具', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + # 发送邮件(使用默认邮箱) + %(prog)s send --to user@example.com --subject "Hello" --content "Test" + + # 发送带附件 + %(prog)s send --to user@example.com --subject "Report" --content "See file" --attach report.pdf + + # 接收最新 5 封邮件 + %(prog)s receive --limit 5 + + # 检查最近 2 天的新邮件 + %(prog)s check-new --since 2 + + # 删除邮件(移到已删除文件夹,可在30天内恢复) + %(prog)s delete --ids 123 + + # 批量删除(移到已删除文件夹) + %(prog)s delete --ids 123 124 125 + + # 彻底删除(不可恢复,立即从服务器移除) + %(prog)s delete --ids 123 --permanent + + # 批量彻底删除 + %(prog)s delete --ids 123 124 125 --permanent + + # 使用 163 邮箱 + %(prog)s send --mailbox 163 --to user@example.com --subject "Test" +""" + ) + + parser.add_argument( + '--mailbox', + choices=['qq', '163'], + default=default_mailbox, + help=f'邮箱类型 (默认: {default_mailbox},可在 config.json 中修改 default_mailbox)' + ) + + subparsers = parser.add_subparsers(dest='command', help='命令') + + # send 命令 + send_parser = subparsers.add_parser('send', help='发送邮件') + send_parser.add_argument('--to', required=True, help='收件人邮箱') + send_parser.add_argument('--subject', required=True, help='邮件主题') + send_parser.add_argument('--content', required=True, help='邮件内容') + send_parser.add_argument('--attach', nargs='+', help='附件文件路径') + + # receive 命令 + receive_parser = subparsers.add_parser('receive', help='接收邮件') + receive_parser.add_argument('--limit', type=int, default=10, help='接收数量 (默认: 10)') + receive_parser.add_argument('--json', action='store_true', help='JSON 格式输出') + + # check-new 命令 + check_parser = subparsers.add_parser('check-new', help='检查新邮件') + check_parser.add_argument('--since', type=int, default=1, help='检查最近 N 天 (默认: 1)') + check_parser.add_argument('--json', action='store_true', help='JSON 格式输出') + + # delete 命令 + delete_parser = subparsers.add_parser('delete', help='删除邮件') + delete_parser.add_argument('--ids', nargs='+', required=True, + help='要删除的邮件 ID(可指定多个,用空格分隔)') + delete_parser.add_argument('--permanent', action='store_true', + help='彻底删除(不可恢复)。不指定此参数时,邮件将移到已删除文件夹,可在30天内恢复。注意:163邮箱使用POP3协议,删除始终是永久的') + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + # 执行命令 + if args.command == 'send': + cmd_send(args) + elif args.command == 'receive': + cmd_receive(args) + elif args.command == 'check-new': + cmd_check_new(args) + elif args.command == 'delete': + cmd_delete(args) + + +if __name__ == '__main__': + main() diff --git a/libs/hexagent_demo/backend/skills/email-mail-master/scripts/requirements.txt b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/requirements.txt new file mode 100644 index 00000000..47365ab0 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/email-mail-master/scripts/requirements.txt @@ -0,0 +1,2 @@ +# 邮箱管理所需的 Python 包 +# 注意: imaplib, smtplib, email 是 Python 标准库,无需安装 diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/LICENSE.txt b/libs/hexagent_demo/backend/skills/pptx-plus-linux/LICENSE.txt new file mode 100644 index 00000000..81c1c957 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/LICENSE.txt @@ -0,0 +1,14 @@ +© 2026 元景, PBC. 保留所有权利。 + +额外限制:尽管协议中有任何相反规定,用户不得: + +- 从服务中提取这些资料或在服务之外保留这些资料的副本 +- 复制或拷贝这些资料,但在授权使用服务期间自动创建的临时副本除外 +- 基于这些资料创作衍生作品 +- 向任何第三方分发、再许可或转让这些资料 +- 制造、许诺销售、销售或进口这些资料中体现的任何发明 +- 对这些资料进行逆向工程、反编译或反汇编 + +接收、查看或持有这些资料并不授予或暗示任何超出上述明确授予的许可或权利。 + +元景保留这些资料的所有权利、所有权和利益,包括所有版权、专利和其他知识产权。 diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/editing.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/editing.md new file mode 100644 index 00000000..66dcf7e2 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/editing.md @@ -0,0 +1,205 @@ +# 编辑演示文稿 + +## 基于模板的工作流 + +使用现有演示文稿作为模板时: + +1. **分析现有幻灯片**: + ```bash + python scripts/thumbnail.py template.pptx + python -m markitdown template.pptx + ``` + 查看 `thumbnails.jpg` 了解布局,查看 markitdown 输出了解占位符文本。 + +2. **规划幻灯片映射**:为每个内容部分选择一个模板幻灯片。 + + ⚠️ **使用多样化的布局** —— 布局单调是常见的失败模式。不要默认使用基本的标题 + 列表幻灯片。主动寻找: + - 多栏布局(双栏、三栏) + - 图片 + 文字组合 + - 全出血图片配文字覆盖 + - 引用或强调幻灯片 + - 章节分隔页 + - 数据/数字突出显示 + - 图标网格或图标 + 文字行 + + **避免:** 每张幻灯片重复使用相同的文字密集型布局。 + + 将内容类型与布局风格匹配(如:要点 → 列表幻灯片,团队信息 → 多栏,证言 → 引用幻灯片)。 + +3. **解包**:`python scripts/office/unpack.py template.pptx unpacked/` + +4. **构建演示文稿**(自己完成,不要使用子代理): + - 删除不需要的幻灯片(从 `` 中移除) + - 复制要重用的幻灯片(`add_slide.py`) + - 在 `` 中重新排序幻灯片 + - **在步骤 5 之前完成所有结构性更改** + +5. **编辑内容**:更新每个 `slide{N}.xml` 中的文本。 + **如果可用,在此处使用子代理** —— 幻灯片是独立的 XML 文件,所以子代理可以并行编辑。 + +6. **清理**:`python scripts/clean.py unpacked/` + +7. **打包**:`python scripts/office/pack.py unpacked/ output.pptx --original template.pptx` + +--- + +## 脚本 + +| 脚本 | 用途 | +|------|------| +| `unpack.py` | 解压并格式化 PPTX | +| `add_slide.py` | 复制幻灯片或从布局创建 | +| `clean.py` | 删除孤立文件 | +| `pack.py` | 验证后重新打包 | +| `thumbnail.py` | 创建幻灯片可视化网格 | + +### unpack.py + +```bash +python scripts/office/unpack.py input.pptx unpacked/ +``` + +解压 PPTX,格式化 XML,转义智能引号。 + +### add_slide.py + +```bash +python scripts/add_slide.py unpacked/ slide2.xml # 复制幻灯片 +python scripts/add_slide.py unpacked/ slideLayout2.xml # 从布局创建 +``` + +打印要添加到 `` 中所需位置的 ``。 + +### clean.py + +```bash +python scripts/clean.py unpacked/ +``` + +删除不在 `` 中的幻灯片、未引用的媒体、孤立的关系文件。 + +### pack.py + +```bash +python scripts/office/pack.py unpacked/ output.pptx --original input.pptx +``` + +验证、修复、压缩 XML、重新编码智能引号。 + +### thumbnail.py + +```bash +python scripts/thumbnail.py input.pptx [output_prefix] [--cols N] +``` + +创建 `thumbnails.jpg`,以幻灯片文件名作为标签。默认 3 列,每网格最多 12 张。 + +**仅用于模板分析**(选择布局)。对于可视化 QA,使用 `soffice` + `pdftoppm` 创建全分辨率单张幻灯片图片 —— 见 SKILL.md。 + +--- + +## 幻灯片操作 + +幻灯片顺序在 `ppt/presentation.xml` → `` 中。 + +**重新排序**:重新排列 `` 元素。 + +**删除**:移除 ``,然后运行 `clean.py`。 + +**添加**:使用 `add_slide.py`。永远不要手动复制幻灯片文件 —— 脚本会处理手动复制会遗漏的备注引用、Content_Types.xml 和关系 ID。 + +--- + +## 编辑内容 + +**子代理:** 如果可用,在此处使用(完成步骤 4 后)。每张幻灯片是独立的 XML 文件,所以子代理可以并行编辑。在给子代理的提示中包含: +- 要编辑的幻灯片文件路径 +- **"所有更改使用 Edit 工具"** +- 下面的格式规则和常见陷阱 + +对于每张幻灯片: +1. 读取幻灯片的 XML +2. 识别所有占位符内容 —— 文本、图片、图表、图标、说明文字 +3. 用最终内容替换每个占位符 + +**使用 Edit 工具,而不是 sed 或 Python 脚本。** Edit 工具强制明确要替换什么和在哪里替换,从而提供更好的可靠性。 + +### 格式规则 + +- **所有标题、副标题和行内标签加粗**:在 `` 上使用 `b="1"`。包括: + - 幻灯片标题 + - 幻灯片内的章节标题 + - 行首的行内标签(如:"状态:"、"描述:") +- **永远不要使用 unicode 项目符号(•)**:使用正确的列表格式 `` 或 `` +- **项目符号一致性**:让项目符号从布局继承。只指定 `` 或 ``。 + +--- + +## 常见陷阱 + +### 模板适配 + +当源内容项目少于模板时: +- **完全删除多余元素**(图片、形状、文本框),不要只清除文本 +- 清除文本内容后检查孤立的视觉元素 +- 进行可视化 QA 以发现数量不匹配 + +用不同长度的内容替换文本时: +- **更短的替换**:通常安全 +- **更长的替换**:可能溢出或意外换行 +- 文本更改后用可视化 QA 测试 +- 考虑截断或拆分内容以适应模板的设计约束 + +**模板槽位 ≠ 源项目**:如果模板有 4 个团队成员但源有 3 个用户,删除第 4 个成员的整个组(图片 + 文本框),而不只是文本。 + +### 多项内容 + +如果源有多项内容(编号列表、多个部分),为每项创建单独的 `` 元素 —— **永远不要连接成一个字符串**。 + +**❌ 错误** —— 所有项目在一个段落中: +```xml + + 步骤 1:做第一件事。步骤 2:做第二件事。 + +``` + +**✅ 正确** —— 分开的段落配粗体标题: +```xml + + + 步骤 1 + + + + 做第一件事。 + + + + 步骤 2 + + +``` + +从原始段落复制 `` 以保留行间距。在标题上使用 `b="1"`。 + +### 智能引号 + +由 unpack/pack 自动处理。但 Edit 工具会将智能引号转换为 ASCII。 + +**添加带引号的新文本时,使用 XML 实体:** + +```xml +the “Agreement” +``` + +| 字符 | 名称 | Unicode | XML 实体 | +|------|------|---------|----------| +| `"` | 左双引号 | U+201C | `“` | +| `"` | 右双引号 | U+201D | `”` | +| `'` | 左单引号 | U+2018 | `‘` | +| `'` | 右单引号 | U+2019 | `’` | + +### 其他 + +- **空白**:在有前导/尾随空格的 `` 上使用 `xml:space="preserve"` +- **XML 解析**:使用 `defusedxml.minidom`,而非 `xml.etree.ElementTree`(会破坏命名空间) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/examin.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/examin.md new file mode 100644 index 00000000..b3ed0249 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/examin.md @@ -0,0 +1,117 @@ +# PPTX Plus Examination & QA (Linux) + +## 可视化工具(PPT 转图片) + +将 PPTX 幻灯片转换为图片以便进行可视化检查和 AI 审核。 + +```bash +# 将目录中所有 PPTX 文件转换为图片 +python scripts/ppt_to_pic.py --ppt-dir ./ppt --output-dir ./images + +# 将单个 PPTX 文件转换为图片 +python scripts/ppt_to_pic.py --file presentation.pptx --output ./images + +# 输出:为每个 PPTX 的幻灯片创建 JPG 图片 +``` + +### Qwen Vision 图片描述 + +使用 Qwen Vision 分析和描述图片: + +```bash +# 描述单张图片 +python scripts/vision_qwen.py --image path/to/image.png --prompt "描述这张幻灯片" + +# 批量描述多张图片(每批最多 5 张) +python scripts/vision_qwen.py --images img1.png img2.png img3.png +``` + +**使用场景:** +- 分析生成的幻灯片进行可视化 QA +- 描述参考图片获取设计灵感 +- 从截图中提取文本和布局信息 + +--- + +## QA(必需) + +**假设存在问题。你的任务是找出它们。** + +你的第一次渲染几乎从来不是正确的。将 QA 视为 bug 狩猎,而非确认步骤。如果在第一次检查时没有发现问题,说明你检查得不够仔细。 + +### 内容 QA + +```bash +python -m markitdown output.pptx +``` + +检查缺失内容、错别字、错误顺序。 + +**使用模板时,检查残留的占位符文本:** + +```bash +python -m markitdown output.pptx | grep -iE "xxxx|lorem|ipsum|this.*(page|slide).*layout" +``` + +如果 grep 返回结果,在声明成功前修复它们。 + +### 可视化 QA + +**⚠️ 使用子代理** —— 即使只有 2-3 张幻灯片。你一直在盯着代码,会看到你期望看到的内容,而不是实际存在的内容。子代理有全新的视角。 + +将幻灯片转换为图片(见[转换为图片](#转换为图片)),然后使用此提示: + +``` +可视化检查这些幻灯片。假设存在问题 —— 找出它们。 + +查找: +- 重叠元素(文字穿过形状、线条穿过文字、堆叠元素) +- 文字溢出或在边缘/框边界处被截断 +- 为单行文本定位的装饰线,但标题换行成了两行 +- 来源引用或页脚与上方内容冲突 +- 元素过近(< 0.3" 间隙)或卡片/区块几乎接触 +- 间隙不均匀(一处大面积空白,另一处拥挤) +- 距幻灯片边缘边距不足(< 0.5") +- 列或类似元素未一致对齐 +- 低对比度文字(如奶油色背景上的浅灰色文字) +- 低对比度图标(如深色背景上的深色图标,没有对比色圆圈) +- 文本框过窄导致过度换行 +- 残留的占位符内容 + +对于每张幻灯片,列出问题或关注点,即使是次要的。 + +读取并分析这些图片: +1. /path/to/slide-01.jpg(预期:[简要描述]) +2. /path/to/slide-02.jpg(预期:[简要描述]) + +报告发现的所有问题,包括次要问题。 +``` + +### 验证循环 + +1. 生成幻灯片 → 转换为图片 → 检查 +2. **列出发现的问题**(如果没有发现问题,更批判性地再次查看) +3. 修复问题 +4. **重新验证受影响的幻灯片** —— 一个修复经常会引发另一个问题 +5. 重复直到完整检查没有发现新问题 + +**在完成至少一次修复-验证循环之前,不要声明成功。** + +--- + +## 转换为图片 + +将演示文稿转换为单独的幻灯片图片以便进行可视化检查: + +```bash +python scripts/office/soffice.py --headless --convert-to pdf output.pptx +pdftoppm -jpeg -r 150 output.pdf slide +``` + +这将创建 `slide-01.jpg`、`slide-02.jpg` 等。 + +修复后重新渲染特定幻灯片: + +```bash +pdftoppm -jpeg -r 150 -f N -l N output.pdf slide-fixed +``` diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/pptxgenjs.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/pptxgenjs.md new file mode 100644 index 00000000..375607e4 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/pptxgenjs.md @@ -0,0 +1,422 @@ +# PptxGenJS 教程 + +## 设置与基本结构 + +```javascript +const pptxgen = require("pptxgenjs"); + +let pres = new pptxgen(); +pres.layout = 'LAYOUT_16x9'; // 或 'LAYOUT_16x10', 'LAYOUT_4x3', 'LAYOUT_WIDE' +pres.author = 'Your Name'; +pres.title = 'Presentation Title'; + +let slide = pres.addSlide(); +slide.addText("Hello World!", { x: 0.5, y: 0.5, fontSize: 36, color: "363636" }); + +pres.writeFile({ fileName: "Presentation.pptx" }); +``` + +## 布局尺寸 + +幻灯片尺寸(坐标单位为英寸): +- `LAYOUT_16x9`:10" × 5.625"(默认) +- `LAYOUT_16x10`:10" × 6.25" +- `LAYOUT_4x3`:10" × 7.5" +- `LAYOUT_WIDE`:13.3" × 7.5" + +--- + +## 文本与格式 + +```javascript +// 基本文本 +slide.addText("Simple Text", { + x: 1, y: 1, w: 8, h: 2, fontSize: 24, fontFace: "Arial", + color: "363636", bold: true, align: "center", valign: "middle" +}); + +// 字符间距(使用 charSpacing,不是 letterSpacing,后者会被静默忽略) +slide.addText("SPACED TEXT", { x: 1, y: 1, w: 8, h: 1, charSpacing: 6 }); + +// 富文本数组 +slide.addText([ + { text: "Bold ", options: { bold: true } }, + { text: "Italic ", options: { italic: true } } +], { x: 1, y: 3, w: 8, h: 1 }); + +// 多行文本(需要 breakLine: true) +slide.addText([ + { text: "Line 1", options: { breakLine: true } }, + { text: "Line 2", options: { breakLine: true } }, + { text: "Line 3" } // 最后一项不需要 breakLine +], { x: 0.5, y: 0.5, w: 8, h: 2 }); + +// 文本框边距(内边距) +slide.addText("Title", { + x: 0.5, y: 0.3, w: 9, h: 0.6, + margin: 0 // 当需要文本与其他元素(如形状或图标)精确对齐时使用 0 +}); +``` + +**提示:** 文本框默认有内边距。当需要文本与相同 x 位置的形状、线条或图标精确对齐时,设置 `margin: 0`。 + +--- + +## 列表与项目符号 + +```javascript +// ✅ 正确:多个项目符号 +slide.addText([ + { text: "First item", options: { bullet: true, breakLine: true } }, + { text: "Second item", options: { bullet: true, breakLine: true } }, + { text: "Third item", options: { bullet: true } } +], { x: 0.5, y: 0.5, w: 8, h: 3 }); + +// ❌ 错误:永远不要使用 unicode 项目符号 +slide.addText("• First item", { ... }); // 会创建双重项目符号 + +// 子项和编号列表 +{ text: "Sub-item", options: { bullet: true, indentLevel: 1 } } +{ text: "First", options: { bullet: { type: "number" }, breakLine: true } } +``` + +--- + +## 形状 + +```javascript +slide.addShape(pres.shapes.RECTANGLE, { + x: 0.5, y: 0.8, w: 1.5, h: 3.0, + fill: { color: "FF0000" }, line: { color: "000000", width: 2 } +}); + +slide.addShape(pres.shapes.OVAL, { x: 4, y: 1, w: 2, h: 2, fill: { color: "0000FF" } }); + +slide.addShape(pres.shapes.LINE, { + x: 1, y: 3, w: 5, h: 0, line: { color: "FF0000", width: 3, dashType: "dash" } +}); + +// 带透明度 +slide.addShape(pres.shapes.RECTANGLE, { + x: 1, y: 1, w: 3, h: 2, + fill: { color: "0088CC", transparency: 50 } +}); + +// 圆角矩形(rectRadius 仅适用于 ROUNDED_RECTANGLE,不适用于 RECTANGLE) +// ⚠️ 不要与矩形强调叠加层配对 —— 它们无法覆盖圆角。改用 RECTANGLE。 +slide.addShape(pres.shapes.ROUNDED_RECTANGLE, { + x: 1, y: 1, w: 3, h: 2, + fill: { color: "FFFFFF" }, rectRadius: 0.1 +}); + +// 带阴影 +slide.addShape(pres.shapes.RECTANGLE, { + x: 1, y: 1, w: 3, h: 2, + fill: { color: "FFFFFF" }, + shadow: { type: "outer", color: "000000", blur: 6, offset: 2, angle: 135, opacity: 0.15 } +}); +``` + +阴影选项: + +| 属性 | 类型 | 范围 | 说明 | +|------|------|------|------| +| `type` | string | `"outer"`, `"inner"` | | +| `color` | string | 6 字符十六进制(如 `"000000"`) | 无 `#` 前缀,无 8 字符十六进制 —— 见常见陷阱 | +| `blur` | number | 0-100 pt | | +| `offset` | number | 0-200 pt | **必须非负** —— 负值会损坏文件 | +| `angle` | number | 0-359 度 | 阴影投射方向(135 = 右下,270 = 向上) | +| `opacity` | number | 0.0-1.0 | 用此控制透明度,永远不要编码到颜色字符串中 | + +要向上投射阴影(如在页脚栏上),使用 `angle: 270` 配正偏移 —— **不要**使用负偏移。 + +**注意**:渐变填充不原生支持。改用渐变图片作为背景。 + +--- + +## 图片 + +### 图片来源 + +```javascript +// 从文件路径 +slide.addImage({ path: "images/chart.png", x: 1, y: 1, w: 5, h: 3 }); + +// 从 URL +slide.addImage({ path: "https://example.com/image.jpg", x: 1, y: 1, w: 5, h: 3 }); + +// 从 base64(更快,无文件 I/O) +slide.addImage({ data: "image/png;base64,iVBORw0KGgo...", x: 1, y: 1, w: 5, h: 3 }); +``` + +### 图片选项 + +```javascript +slide.addImage({ + path: "image.png", + x: 1, y: 1, w: 5, h: 3, + rotate: 45, // 0-359 度 + rounding: true, // 圆形裁剪 + transparency: 50, // 0-100 + flipH: true, // 水平翻转 + flipV: false, // 垂直翻转 + altText: "Description", // 无障碍访问 + hyperlink: { url: "https://example.com" } +}); +``` + +### 图片尺寸模式 + +```javascript +// Contain - 适应内部,保持比例 +{ sizing: { type: 'contain', w: 4, h: 3 } } + +// Cover - 填充区域,保持比例(可能裁剪) +{ sizing: { type: 'cover', w: 4, h: 3 } } + +// Crop - 裁剪特定部分 +{ sizing: { type: 'crop', x: 0.5, y: 0.5, w: 2, h: 2 } } +``` + +### 计算尺寸(保持宽高比) + +```javascript +const origWidth = 1978, origHeight = 923, maxHeight = 3.0; +const calcWidth = maxHeight * (origWidth / origHeight); +const centerX = (10 - calcWidth) / 2; + +slide.addImage({ path: "image.png", x: centerX, y: 1.2, w: calcWidth, h: maxHeight }); +``` + +### 支持的格式 + +- **标准格式**:PNG、JPG、GIF(动态 GIF 在 Microsoft 365 中可用) +- **SVG**:在现代 PowerPoint/Microsoft 365 中可用 + +--- + +## 图标 + +使用 react-icons 生成 SVG 图标,然后转换为 PNG 以实现通用兼容性。 + +### 设置 + +```javascript +const React = require("react"); +const ReactDOMServer = require("react-dom/server"); +const sharp = require("sharp"); +const { FaCheckCircle, FaChartLine } = require("react-icons/fa"); + +function renderIconSvg(IconComponent, color = "#000000", size = 256) { + return ReactDOMServer.renderToStaticMarkup( + React.createElement(IconComponent, { color, size: String(size) }) + ); +} + +async function iconToBase64Png(IconComponent, color, size = 256) { + const svg = renderIconSvg(IconComponent, color, size); + const pngBuffer = await sharp(Buffer.from(svg)).png().toBuffer(); + return "image/png;base64," + pngBuffer.toString("base64"); +} +``` + +### 添加图标到幻灯片 + +```javascript +const iconData = await iconToBase64Png(FaCheckCircle, "#4472C4", 256); + +slide.addImage({ + data: iconData, + x: 1, y: 1, w: 0.5, h: 0.5 // 尺寸单位为英寸 +}); +``` + +**注意**:使用 256 或更高的尺寸以获得清晰的图标。size 参数控制栅格化分辨率,而不是幻灯片上的显示尺寸(由 `w` 和 `h` 以英寸为单位设置)。 + +### 图标库 + +安装:`npm install -g react-icons react react-dom sharp` + +react-icons 中流行的图标集: +- `react-icons/fa` - Font Awesome +- `react-icons/md` - Material Design +- `react-icons/hi` - Heroicons +- `react-icons/bi` - Bootstrap Icons + +--- + +## 幻灯片背景 + +```javascript +// 纯色 +slide.background = { color: "F1F1F1" }; + +// 带透明度的颜色 +slide.background = { color: "FF3399", transparency: 50 }; + +// 从 URL 加载图片 +slide.background = { path: "https://example.com/bg.jpg" }; + +// 从 base64 加载图片 +slide.background = { data: "image/png;base64,iVBORw0KGgo..." }; +``` + +--- + +## 表格 + +```javascript +slide.addTable([ + ["Header 1", "Header 2"], + ["Cell 1", "Cell 2"] +], { + x: 1, y: 1, w: 8, h: 2, + border: { pt: 1, color: "999999" }, fill: { color: "F1F1F1" } +}); + +// 高级用法:合并单元格 +let tableData = [ + [{ text: "Header", options: { fill: { color: "6699CC" }, color: "FFFFFF", bold: true } }, "Cell"], + [{ text: "Merged", options: { colspan: 2 } }] +]; +slide.addTable(tableData, { x: 1, y: 3.5, w: 8, colW: [4, 4] }); +``` + +--- + +## 图表 + +```javascript +// 柱状图 +slide.addChart(pres.charts.BAR, [{ + name: "Sales", labels: ["Q1", "Q2", "Q3", "Q4"], values: [4500, 5500, 6200, 7100] +}], { + x: 0.5, y: 0.6, w: 6, h: 3, barDir: 'col', + showTitle: true, title: 'Quarterly Sales' +}); + +// 折线图 +slide.addChart(pres.charts.LINE, [{ + name: "Temp", labels: ["Jan", "Feb", "Mar"], values: [32, 35, 42] +}], { x: 0.5, y: 4, w: 6, h: 3, lineSize: 3, lineSmooth: true }); + +// 饼图 +slide.addChart(pres.charts.PIE, [{ + name: "Share", labels: ["A", "B", "Other"], values: [35, 45, 20] +}], { x: 7, y: 1, w: 5, h: 4, showPercent: true }); +``` + +### 更美观的图表 + +默认图表看起来过时。应用以下选项以获得现代、简洁的外观: + +```javascript +slide.addChart(pres.charts.BAR, chartData, { + x: 0.5, y: 1, w: 9, h: 4, barDir: "col", + + // 自定义颜色(匹配你的演示配色方案) + chartColors: ["0D9488", "14B8A6", "5EEAD4"], + + // 简洁背景 + chartArea: { fill: { color: "FFFFFF" }, roundedCorners: true }, + + // 柔和的轴标签 + catAxisLabelColor: "64748B", + valAxisLabelColor: "64748B", + + // 细微网格(仅值轴) + valGridLine: { color: "E2E8F0", size: 0.5 }, + catGridLine: { style: "none" }, + + // 柱上的数据标签 + showValue: true, + dataLabelPosition: "outEnd", + dataLabelColor: "1E293B", + + // 单系列时隐藏图例 + showLegend: false, +}); +``` + +**关键样式选项:** +- `chartColors: [...]` - 系列/片段的十六进制颜色 +- `chartArea: { fill, border, roundedCorners }` - 图表背景 +- `catGridLine/valGridLine: { color, style, size }` - 网格线(`style: "none"` 隐藏) +- `lineSmooth: true` - 曲线(折线图) +- `legendPos: "r"` - 图例位置:"b", "t", "l", "r", "tr" + +--- + +## 幻灯片母版 + +```javascript +pres.defineSlideMaster({ + title: 'TITLE_SLIDE', background: { color: '283A5E' }, + objects: [{ + placeholder: { options: { name: 'title', type: 'title', x: 1, y: 2, w: 8, h: 2 } } + }] +}); + +let titleSlide = pres.addSlide({ masterName: "TITLE_SLIDE" }); +titleSlide.addText("My Title", { placeholder: "title" }); +``` + +--- + +## 常见陷阱 + +⚠️ 这些问题会导致文件损坏、视觉错误或输出中断。请避免。 + +1. **永远不要在十六进制颜色中使用 "#"** - 会导致文件损坏 + ```javascript + color: "FF0000" // ✅ 正确 + color: "#FF0000" // ❌ 错误 + ``` + +2. **永远不要在十六进制颜色字符串中编码透明度** - 8 字符颜色(如 `"00000020"`)会损坏文件。改用 `opacity` 属性。 + ```javascript + shadow: { type: "outer", blur: 6, offset: 2, color: "00000020" } // ❌ 损坏文件 + shadow: { type: "outer", blur: 6, offset: 2, color: "000000", opacity: 0.12 } // ✅ 正确 + ``` + +3. **使用 `bullet: true`** - 永远不要使用 unicode 符号如 "•"(会创建双重项目符号) + +4. **数组项之间使用 `breakLine: true`** 否则文本会连在一起 + +5. **项目符号避免使用 `lineSpacing`** - 会导致过大间隙;改用 `paraSpaceAfter` + +6. **每个演示文稿需要新实例** - 不要重用 `pptxgen()` 对象 + +7. **永远不要跨调用重用选项对象** - PptxGenJS 会原地修改对象(如将阴影值转换为 EMU)。在多次调用间共享一个对象会损坏第二个形状。 + ```javascript + const shadow = { type: "outer", blur: 6, offset: 2, color: "000000", opacity: 0.15 }; + slide.addShape(pres.shapes.RECTANGLE, { shadow, ... }); // ❌ 第二次调用会得到已转换的值 + slide.addShape(pres.shapes.RECTANGLE, { shadow, ... }); + + const makeShadow = () => ({ type: "outer", blur: 6, offset: 2, color: "000000", opacity: 0.15 }); + slide.addShape(pres.shapes.RECTANGLE, { shadow: makeShadow(), ... }); // ✅ 每次创建新对象 + slide.addShape(pres.shapes.RECTANGLE, { shadow: makeShadow(), ... }); + ``` + +8. **不要将 `ROUNDED_RECTANGLE` 与强调边框一起使用** - 矩形叠加条无法覆盖圆角。改用 `RECTANGLE`。 + ```javascript + // ❌ 错误:强调条无法覆盖圆角 + slide.addShape(pres.shapes.ROUNDED_RECTANGLE, { x: 1, y: 1, w: 3, h: 1.5, fill: { color: "FFFFFF" } }); + slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 0.08, h: 1.5, fill: { color: "0891B2" } }); + + // ✅ 正确:使用 RECTANGLE 实现整洁对齐 + slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 3, h: 1.5, fill: { color: "FFFFFF" } }); + slide.addShape(pres.shapes.RECTANGLE, { x: 1, y: 1, w: 0.08, h: 1.5, fill: { color: "0891B2" } }); + ``` + +9. **永远不要使用中文引号(如:" ")** - 在 JavaScript 字符串中,使用标准 ASCII 引号(`' '` 或 `" "`)。中文引号会导致 PptxGenJS 崩溃或生成损坏文件。 + +--- + +## 快速参考 + +- **形状**:RECTANGLE, OVAL, LINE, ROUNDED_RECTANGLE +- **图表**:BAR, LINE, PIE, DOUGHNUT, SCATTER, BUBBLE, RADAR +- **布局**:LAYOUT_16x9 (10"×5.625"), LAYOUT_16x10, LAYOUT_4x3, LAYOUT_WIDE +- **对齐**:"left", "center", "right" +- **图表数据标签**:"outEnd", "inEnd", "center" diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_area_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_area_chart.md new file mode 100644 index 00000000..0845a814 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_area_chart.md @@ -0,0 +1,27 @@ +# generate_area_chart — 面积图 + +## 功能概述 +展示连续自变量(常为时间)下的数值趋势,可启用堆叠观察不同分组的累计贡献,适合 KPI、能源、产出等时间序列场景。 + +## 输入字段 +### 必填 +- `data`: 数组,元素包含 `time`(string)与 `value`(number),堆叠时需补充 `group`(string),至少 1 条记录。 + +### 可选 +- `stack`: boolean,默认 `false`,开启堆叠需确保每条数据都含 `group` 字段。 +- `style.backgroundColor`: string,设置图表背景色(如 `#fff`)。 +- `style.lineWidth`: number,自定义面积边界的线宽。 +- `style.palette`: string[],传入调色板数组用于系列着色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough` 以控制手绘质感。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`,控制图表宽度。 +- `height`: number,默认 `400`,控制图表高度。 +- `title`: string,默认空字符串,用于设置图表标题。 +- `axisXTitle`: string,默认空字符串,用于设置 X 轴标题。 +- `axisYTitle`: string,默认空字符串,用于设置 Y 轴标题。 + +## 使用建议 +保证 `time` 字段格式统一(如 `YYYY-MM`);堆叠模式下各组数据需覆盖相同的时间点,可先做缺失补值。 + +## 返回结果 +- 返回图像 URL,并在 `_meta.spec` 中附带完整面积图配置,可供二次渲染或追踪。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_bar_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_bar_chart.md new file mode 100644 index 00000000..5044d535 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_bar_chart.md @@ -0,0 +1,27 @@ +# generate_bar_chart — 条形图 + +## 功能概述 +以横向条形比较不同类别或分组的指标表现,适合 Top-N 排行、不同地区或渠道对比。 + +## 输入字段 +### 必填 +- `data`: array,每条至少含 `category`(string)与 `value`(number),如需分组或堆叠需额外提供 `group`(string)。 + +### 可选 +- `group`: boolean,默认 `false`,启用后以并排形式展示不同 `group`,并要求 `stack=false` 且数据含 `group` 字段。 +- `stack`: boolean,默认 `true`,启用后将不同 `group` 堆叠在同一条形上,并要求 `group=false` 且数据含 `group` 字段。 +- `style.backgroundColor`: string,自定义背景色(如 `#fff`)。 +- `style.palette`: string[],设置系列颜色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`,控制图表宽度。 +- `height`: number,默认 `400`,控制图表高度。 +- `title`: string,默认空字符串,用于设置图表标题。 +- `axisXTitle`: string,默认空字符串,设置 X 轴标题。 +- `axisYTitle`: string,默认空字符串,设置 Y 轴标题。 + +## 使用建议 +类别名称保持简短;若系列数较多可改用堆叠或筛选重点项目,以免图表拥挤。 + +## 返回结果 +- 返回条形图图像 URL,并在 `_meta.spec` 中给出完整配置以便复用。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_boxplot_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_boxplot_chart.md new file mode 100644 index 00000000..d2259a24 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_boxplot_chart.md @@ -0,0 +1,25 @@ +# generate_boxplot_chart — 箱型图 + +## 功能概述 +展示各类别数据的分布范围(最值、四分位、异常值),用于质量监控、实验结果或群体分布比较。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `category`(string)与 `value`(number),可选 `group`(string)用于多组比较。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义配色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 +- `axisYTitle`: string,默认空字符串。 + +## 使用建议 +单个类别至少提供 5 个样本以保证统计意义;如需展示多批次,可通过 `group` 或拆分多次调用。 + +## 返回结果 +- 返回箱型图 URL,并在 `_meta.spec` 中储存输入规格。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_column_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_column_chart.md new file mode 100644 index 00000000..d21ba401 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_column_chart.md @@ -0,0 +1,27 @@ +# generate_column_chart — 柱状图 + +## 功能概述 +纵向柱状对比不同类别或时间段的指标,可分组或堆叠展示,常用于销量、营收、客流对比。 + +## 输入字段 +### 必填 +- `data`: array,每条至少含 `category`(string)与 `value`(number),如需分组或堆叠需补充 `group`(string)。 + +### 可选 +- `group`: boolean,默认 `true`,用于按系列并排展示不同 `group`,开启时需确保 `stack=false` 且数据包含 `group`。 +- `stack`: boolean,默认 `false`,用于将不同 `group` 堆叠到同一柱子,开启时需确保 `group=false` 且数据包含 `group`。 +- `style.backgroundColor`: string,自定义背景色。 +- `style.palette`: string[],定义配色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 +- `axisYTitle`: string,默认空字符串。 + +## 使用建议 +当类别较多(>12)时可按 Top-N 或聚合;堆叠模式要确保各记录都含 `group` 字段以免校验失败。 + +## 返回结果 +- 返回柱状图 URL,并随 `_meta.spec` 提供配置详情。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_district_map.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_district_map.md new file mode 100644 index 00000000..9ce13ccc --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_district_map.md @@ -0,0 +1,28 @@ +# generate_district_map — 行政区地图(中国) + +## 功能概述 +生成中国境内省/市/区/县的覆盖或热力图,可展示指标区间、类别或区域组成,适用于区域销售、政策覆盖等场景。 + +## 输入字段 +### 必填 +- `title`: string,必填且≤16 字,描述地图主题。 +- `data`: object,必填,承载行政区配置及指标信息。 +- `data.name`: string,必填,中国境内的行政区关键词,需明确到省/市/区/县。 + +### 可选 +- `data.style.fillColor`: string,自定义无数据区域的填充色。 +- `data.colors`: string[],枚举或连续色带,默认提供 10 色列表。 +- `data.dataType`: string,枚举 `number`/`enum`,决定颜色映射方式。 +- `data.dataLabel`: string,指标名称(如 `GDP`)。 +- `data.dataValue`: string,指标值或枚举标签。 +- `data.dataValueUnit`: string,指标单位(如 `万亿`)。 +- `data.showAllSubdistricts`: boolean,默认 `false`,是否展示全部下级行政区。 +- `data.subdistricts[]`: array,用于下钻各子区域,元素至少含 `name`,可附 `dataValue` 与 `style.fillColor`。 +- `width`: number,默认 `1600`,设置图宽。 +- `height`: number,默认 `1000`,设置图高。 + +## 使用建议 +名称必须精确到行政层级,避免模糊词;若配置 `subdistricts`,需同时开启 `showAllSubdistricts`;地图只支持中国境内且依赖高德数据。 + +## 返回结果 +- 返回地图图像 URL,并在 `_meta.spec` 中保留完整输入;若配置了 `SERVICE_ID`,生成记录会同步到“我的地图”小程序。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_dual_axes_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_dual_axes_chart.md new file mode 100644 index 00000000..a2b95544 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_dual_axes_chart.md @@ -0,0 +1,25 @@ +# generate_dual_axes_chart — 双轴图 + +## 功能概述 +在同一画布上叠加柱状与折线(或两条不同量纲曲线),用于同时展示趋势与对比,如营收 vs 利润、温度 vs 降雨。 + +## 输入字段 +### 必填 +- `categories`: string[],按顺序提供 X 轴刻度(如年份、月份、品类)。 +- `series`: array,每项至少包含 `type`(`column`/`line`)与 `data`(number[],长度与 `categories` 一致),可选 `axisYTitle`(string)描述该系列 Y 轴含义。 + +### 可选 +- `style.backgroundColor`: string,自定义背景色。 +- `style.palette`: string[],配置多系列配色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 + +## 使用建议 +仅在确有不同量纲或图例对比需求时使用;保持系列数量 ≤2 以免阅读复杂;若两曲线差值巨大可使用次坐标轴进行缩放。 + +## 返回结果 +- 返回双轴图图像 URL,并随 `_meta.spec` 给出详细参数。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_fishbone_diagram.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_fishbone_diagram.md new file mode 100644 index 00000000..0859852b --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_fishbone_diagram.md @@ -0,0 +1,20 @@ +# generate_fishbone_diagram — 鱼骨图 + +## 功能概述 +用于根因分析,将中心问题放在主干,左右分支展示不同类别的原因及其细化节点,常见于质量管理、流程优化。 + +## 输入字段 +### 必填 +- `data`: object,必填,至少提供根节点 `name`,可通过 `children`(array)递归拓展,最大建议 3 层。 + +### 可选 +- `style.texture`: string,默认 `default`,可选 `default`/`rough` 以切换线条风格。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 + +## 使用建议 +主干节点描述问题陈述;一级分支命名原因类别(人、机、料、法等);叶子节点写具体现象,保持短语式表达。 + +## 返回结果 +- 返回鱼骨图 URL,并在 `_meta.spec` 中保存树形结构,便于后续增删节点。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_flow_diagram.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_flow_diagram.md new file mode 100644 index 00000000..efed160c --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_flow_diagram.md @@ -0,0 +1,22 @@ +# generate_flow_diagram — 流程图 + +## 功能概述 +以节点和连线展示业务流程、审批链或算法步骤,支持开始/判断/操作等多种节点类型。 + +## 输入字段 +### 必填 +- `data`: object,必填,包含节点与连线定义。 +- `data.nodes`: array,至少 1 条,节点需提供唯一 `name`。 +- `data.edges`: array,至少 1 条,包含 `source` 与 `target`(string),可选 `name` 作为连线文本。 + +### 可选 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 + +## 使用建议 +先罗列节点 `name` 并保持唯一,再建立连线;若需要描述条件,可在 `edges.name` 中填写;流程应保持单向或明确分支避免交叉。 + +## 返回结果 +- 返回流程图 URL,并携带 `_meta.spec` 中的节点与边数据,方便下次调整。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_funnel_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_funnel_chart.md new file mode 100644 index 00000000..feb2af87 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_funnel_chart.md @@ -0,0 +1,23 @@ +# generate_funnel_chart — 漏斗图 + +## 功能概述 +展示多阶段转化或流失情况,常用于销售管道、用户旅程等逐步筛选过程。 + +## 输入字段 +### 必填 +- `data`: array,需按流程顺序排列,每条包含 `category`(string)与 `value`(number)。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义各阶段颜色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +阶段顺序需按实际流程排列;若数值为百分比应统一基准并在标题或备注中说明口径;避免阶段过多导致阅读困难(建议 ≤6)。 + +## 返回结果 +- 返回漏斗图 URL,并附 `_meta.spec` 方便复用。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_histogram_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_histogram_chart.md new file mode 100644 index 00000000..a081a8d5 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_histogram_chart.md @@ -0,0 +1,26 @@ +# generate_histogram_chart — 直方图 + +## 功能概述 +通过分箱显示连续数值的频数或概率分布,便于识别偏态、离群与集中区间。 + +## 输入字段 +### 必填 +- `data`: number[],至少 1 条,用于构建频数分布。 + +### 可选 +- `binNumber`: number,自定义分箱数量,未设置则自动估算。 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义柱体颜色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 +- `axisYTitle`: string,默认空字符串。 + +## 使用建议 +清理空值/异常后再传入;样本量建议 ≥30;根据业务意义调整 `binNumber` 以兼顾细节与整体趋势。 + +## 返回结果 +- 返回直方图 URL,并在 `_meta.spec` 存储参数。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_line_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_line_chart.md new file mode 100644 index 00000000..23b261ff --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_line_chart.md @@ -0,0 +1,26 @@ +# generate_line_chart — 折线图 + +## 功能概述 +展示时间或连续自变量的趋势,可支持多系列对比,适合 KPI 监控、指标预测、走势分析。 + +## 输入字段 +### 必填 +- `data`: array,每条包含 `time`(string)与 `value`(number),多系列时附带 `group`(string)。 + +### 可选 +- `style.lineWidth`: number,自定义折线线宽。 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],指定系列颜色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 +- `axisYTitle`: string,默认空字符串。 + +## 使用建议 +所有系列的时间点应对齐;建议按 ISO 如 `2025-01-01` 或 `2025-W01` 格式化;对于高频数据可先聚合到日/周粒度避免过密。 + +## 返回结果 +- 返回折线图 URL,并附 `_meta.spec` 供后续编辑。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_liquid_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_liquid_chart.md new file mode 100644 index 00000000..5e01d256 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_liquid_chart.md @@ -0,0 +1,24 @@ +# generate_liquid_chart — 水波图 + +## 功能概述 +以液面高度展示单一百分比或进度,视觉动效强,适合达成率、资源占用等指标。 + +## 输入字段 +### 必填 +- `percent`: number,取值范围 [0,1],表示当前百分比或进度。 + +### 可选 +- `shape`: string,默认 `circle`,可选 `circle`/`rect`/`pin`/`triangle`。 +- `style.backgroundColor`: string,自定义背景色。 +- `style.color`: string,自定义水波颜色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +确保百分比经过归一化;单图仅支持一个进度,如需多指标请并排生成多个水波图;标题可写“目标完成率 85%”。 + +## 返回结果 +- 返回水波图 URL,并在 `_meta.spec` 中记录参数。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_mind_map.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_mind_map.md new file mode 100644 index 00000000..f4fce637 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_mind_map.md @@ -0,0 +1,20 @@ +# generate_mind_map — 思维导图 + +## 功能概述 +围绕中心主题展开 2~3 级分支,帮助组织想法、计划或知识结构,常用于头脑风暴、方案规划。 + +## 输入字段 +### 必填 +- `data`: object,必填,节点至少含 `name`,可通过 `children`(array)递归扩展,建议深度 ≤3。 + +### 可选 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 + +## 使用建议 +中心节点写主题,一级分支代表主要维度(目标、资源、风险等),叶子节点使用短语;如分支较多,可先分拆多张导图。 + +## 返回结果 +- 返回思维导图 URL,并在 `_meta.spec` 中保留节点树以便后续优化。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_network_graph.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_network_graph.md new file mode 100644 index 00000000..c138070a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_network_graph.md @@ -0,0 +1,22 @@ +# generate_network_graph — 网络关系图 + +## 功能概述 +以节点与连线呈现实体之间的连接关系,适合社交网络、系统依赖、知识图谱等场景。 + +## 输入字段 +### 必填 +- `data`: object,必填,包含节点与连线。 +- `data.nodes`: array,至少 1 条,需提供唯一 `name`。 +- `data.edges`: array,至少 1 条,包含 `source` 与 `target`(string),可选 `name` 说明关系。 + +### 可选 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 + +## 使用建议 +节点数量保持在 10~50 之间以避免拥挤;确保 `edges` 中的 `source/target` 对应已存在的节点;可在 `label` 中注明关系含义。 + +## 返回结果 +- 返回网络图 URL,并提供 `_meta.spec` 以便后续增删节点。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_organization_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_organization_chart.md new file mode 100644 index 00000000..f6bbc964 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_organization_chart.md @@ -0,0 +1,21 @@ +# generate_organization_chart — 组织架构图 + +## 功能概述 +展示公司、团队或项目的层级关系,并可在节点上描述角色职责。 + +## 输入字段 +### 必填 +- `data`: object,必填,节点至少含 `name`(string),可选 `description`(string),子节点通过 `children`(array)嵌套,最大深度建议为 3。 + +### 可选 +- `orient`: string,默认 `vertical`,可选 `horizontal`/`vertical`。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 + +## 使用建议 +节点名称使用岗位/角色,`description` 简要说明职责或人数;若组织较大可拆分多个子图或按部门分批展示。 + +## 返回结果 +- 返回组织架构图 URL,并在 `_meta.spec` 保存结构便于日后迭代。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_path_map.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_path_map.md new file mode 100644 index 00000000..785c6665 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_path_map.md @@ -0,0 +1,20 @@ +# generate_path_map — 路径地图(中国) + +## 功能概述 +基于高德地图展示中国境内的路线或行程,按顺序连接一系列 POI,适用于物流路线、旅游规划、配送轨迹等。 + +## 输入字段 +### 必填 +- `title`: string,必填且≤16 字,描述路线主题。 +- `data`: array,至少 1 个路线对象。 +- `data[].data`: string[],必填,包含该路线上按顺序排列的中国境内 POI 名称。 + +### 可选 +- `width`: number,默认 `1600`。 +- `height`: number,默认 `1000`。 + +## 使用建议 +POI 名称必须具体且位于中国(如“西安市钟楼”“杭州西湖苏堤春晓”);若需多条线路,可在 `data` 中添加多段对象。 + +## 返回结果 +- 返回路径地图 URL,并在 `_meta.spec` 中保留标题与 POI 列表;若配置 `SERVICE_ID`,还会记录到“我的地图”。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_pie_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_pie_chart.md new file mode 100644 index 00000000..f13e2fb1 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_pie_chart.md @@ -0,0 +1,24 @@ +# generate_pie_chart — 饼/环图 + +## 功能概述 +展示整体与部分的占比,可通过内径形成环图,适用于市场份额、预算构成、用户群划分等。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `category`(string)与 `value`(number)。 + +### 可选 +- `innerRadius`: number,范围 [0, 1],默认 `0`,设为 `0.6` 等值可生成环图。 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义配色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +类别数量建议 ≤6,若更多可聚合为“其它”;确保数值单位统一(百分比或绝对值),必要时在标题中说明基数。 + +## 返回结果 +- 返回饼/环图 URL,并附 `_meta.spec`。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_pin_map.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_pin_map.md new file mode 100644 index 00000000..ca48b2a4 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_pin_map.md @@ -0,0 +1,23 @@ +# generate_pin_map — 点标地图(中国) + +## 功能概述 +在中国地图上以标记展示多个 POI 位置,可配合弹窗显示图片或说明,适用于门店分布、资产布点等。 + +## 输入字段 +### 必填 +- `title`: string,必填且≤16 字,概述点位集合。 +- `data`: string[],必填,包含中国境内的 POI 名称列表。 + +### 可选 +- `markerPopup.type`: string,固定为 `image`。 +- `markerPopup.width`: number,默认 `40`,图片宽度。 +- `markerPopup.height`: number,默认 `40`,图片高度。 +- `markerPopup.borderRadius`: number,默认 `8`,图片圆角。 +- `width`: number,默认 `1600`。 +- `height`: number,默认 `1000`。 + +## 使用建议 +POI 名称需包含足够的地理限定(城市+地标);根据业务可在名称中附带属性,如“上海徐汇门店 A”;地图依赖高德数据,仅支持中国。 + +## 返回结果 +- 返回点标地图 URL,并在 `_meta.spec` 中保存点位与弹窗配置。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_radar_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_radar_chart.md new file mode 100644 index 00000000..d41b02cb --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_radar_chart.md @@ -0,0 +1,24 @@ +# generate_radar_chart — 雷达图 + +## 功能概述 +在多维坐标系上比较单个对象或多对象的能力维度,常用于评测、产品对比、绩效画像。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `name`(string)与 `value`(number),可选 `group`(string)。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.lineWidth`: number,设置雷达线宽。 +- `style.palette`: string[],定义系列颜色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +维度数量控制在 4~8 之间;不同对象通过 `group` 区分并保证同一维度都给出数值;如量纲不同需先归一化。 + +## 返回结果 +- 返回雷达图 URL,并附 `_meta.spec`。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_sankey_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_sankey_chart.md new file mode 100644 index 00000000..8a5e1ba9 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_sankey_chart.md @@ -0,0 +1,24 @@ +# generate_sankey_chart — 桑基图 + +## 功能概述 +展示资源、能量或用户流在不同节点之间的流向与数量,适合预算分配、流量路径、能耗分布等。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `source`(string)、`target`(string)与 `value`(number)。 + +### 可选 +- `nodeAlign`: string,默认 `center`,可选 `left`/`right`/`justify`/`center`。 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义节点配色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +节点名称保持唯一,避免过多交叉;如存在环路需先打平为阶段流向;可按阈值过滤小流量以聚焦重点。 + +## 返回结果 +- 返回桑基图 URL,并在 `_meta.spec` 存放节点与流量定义。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_scatter_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_scatter_chart.md new file mode 100644 index 00000000..56c010b0 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_scatter_chart.md @@ -0,0 +1,25 @@ +# generate_scatter_chart — 散点图 + +## 功能概述 +展示两个连续变量之间的关系,可通过颜色/形状区分不同分组,适合相关性分析、聚类探索。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `x`(number)与 `y`(number),可选 `group`(string)。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],指定系列配色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 +- `axisYTitle`: string,默认空字符串。 + +## 使用建议 +在上传前可对不同量纲进行标准化;若数据量很大可先抽样;使用 `group` 区分不同类别或聚类结果以便阅读。 + +## 返回结果 +- 返回散点图 URL,并附 `_meta.spec`。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_spreadsheet.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_spreadsheet.md new file mode 100644 index 00000000..001ae80c --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_spreadsheet.md @@ -0,0 +1,24 @@ +# generate_spreadsheet — 电子表格/数据透视表 + +## 功能概述 +生成电子表格或数据透视表,用于展示结构化的表格数据。当提供 `rows` 或 `values` 字段时,渲染为数据透视表(交叉表);否则渲染为常规表格。适合展示结构化数据、跨类别比较值以及创建数据汇总。 + +## 输入字段 +### 必填 +- `data`: array,表格数据数组,每个对象代表一行。键是列名,值可以是字符串、数字、null 或 undefined。例如:`[{ name: 'John', age: 30 }, { name: 'Jane', age: 25 }]`。 + +### 可选 +- `rows`: array,数据透视表的行标题字段。当提供 `rows` 或 `values` 时,电子表格将渲染为数据透视表。 +- `columns`: array,列标题字段,用于指定列的顺序。对于常规表格,这决定列的顺序;对于数据透视表,用于列分组。 +- `values`: array,数据透视表的值字段。当提供 `rows` 或 `values` 时,电子表格将渲染为数据透视表。 +- `theme`: string,默认 `default`,可选 `default`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 + +## 使用建议 +- 对于常规表格,只需提供 `data` 和可选的 `columns` 来控制列的顺序。 +- 对于数据透视表(交叉表),提供 `rows` 用于行分组,`columns` 用于列分组,`values` 用于聚合的值字段。 +- 确保数据中的字段名与 `rows`、`columns`、`values` 中指定的字段名一致。 + +## 返回结果 +- 返回电子表格/数据透视表图片 URL,并附 `_meta.spec` 供后续编辑。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_treemap_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_treemap_chart.md new file mode 100644 index 00000000..186af04c --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_treemap_chart.md @@ -0,0 +1,23 @@ +# generate_treemap_chart — 矩形树图 + +## 功能概述 +以嵌套矩形展示层级结构及各节点权重,适合资产占比、市场份额、目录容量等。 + +## 输入字段 +### 必填 +- `data`: array,节点数组,每条含 `name`(string)与 `value`(number),可递归嵌套 `children`。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义配色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +确保每个节点 `value` ≥0,并与子节点之和一致;树层级不宜过深,可按需要提前聚合;为提升可读性可在节点名中加上数值单位。 + +## 返回结果 +- 返回矩形树图 URL,并同步 `_meta.spec`。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_venn_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_venn_chart.md new file mode 100644 index 00000000..91235c02 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_venn_chart.md @@ -0,0 +1,23 @@ +# generate_venn_chart — 维恩图 + +## 功能概述 +展示多个集合之间的交集、并集与差异,适用于市场细分、特性覆盖、用户重叠分析。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `value`(number)与 `sets`(string[]),可选 `label`(string)。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义配色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +集合数量建议 ≤4;若缺少精确权重可根据大致占比填写;集合命名保持简洁明确(如“移动端用户”)。 + +## 返回结果 +- 返回维恩图 URL,并保存在 `_meta.spec` 中。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_violin_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_violin_chart.md new file mode 100644 index 00000000..39825d77 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_violin_chart.md @@ -0,0 +1,25 @@ +# generate_violin_chart — 小提琴图 + +## 功能概述 +结合核密度曲线与箱型统计展示不同类别的分布形态,适合对比多批次实验或群体表现。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `category`(string)与 `value`(number),可选 `group`(string)。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义配色列表。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 +- `axisXTitle`: string,默认空字符串。 +- `axisYTitle`: string,默认空字符串。 + +## 使用建议 +各类别样本量建议 ≥30 以确保密度估计稳定;如需要突出四分位信息,可与箱型图结合展示。 + +## 返回结果 +- 返回小提琴图 URL,并在 `_meta.spec` 中保留配置。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_word_cloud_chart.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_word_cloud_chart.md new file mode 100644 index 00000000..047d4938 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/references/generate_word_cloud_chart.md @@ -0,0 +1,23 @@ +# generate_word_cloud_chart — 词云图 + +## 功能概述 +根据词频或权重调节文字大小与位置,用于快速提炼文本主题、情绪或关键词热点。 + +## 输入字段 +### 必填 +- `data`: array,每条记录包含 `text`(string)与 `value`(number)。 + +### 可选 +- `style.backgroundColor`: string,设置背景色。 +- `style.palette`: string[],定义词云配色。 +- `style.texture`: string,默认 `default`,可选 `default`/`rough`。 +- `theme`: string,默认 `default`,可选 `default`/`academy`/`dark`。 +- `width`: number,默认 `600`。 +- `height`: number,默认 `400`。 +- `title`: string,默认空字符串。 + +## 使用建议 +生成前去除停用词并合并同义词;统一大小写避免重复;如需突出情绪可按正负值映射配色。 + +## 返回结果 +- 返回词云图 URL,并附 `_meta.spec`。 \ No newline at end of file diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/__init__.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/add_slide.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/add_slide.py new file mode 100644 index 00000000..13700df0 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/add_slide.py @@ -0,0 +1,195 @@ +"""Add a new slide to an unpacked PPTX directory. + +Usage: python add_slide.py + +The source can be: + - A slide file (e.g., slide2.xml) - duplicates the slide + - A layout file (e.g., slideLayout2.xml) - creates from layout + +Examples: + python add_slide.py unpacked/ slide2.xml + # Duplicates slide2, creates slide5.xml + + python add_slide.py unpacked/ slideLayout2.xml + # Creates slide5.xml from slideLayout2.xml + +To see available layouts: ls unpacked/ppt/slideLayouts/ + +Prints the element to add to presentation.xml. +""" + +import re +import shutil +import sys +from pathlib import Path + + +def get_next_slide_number(slides_dir: Path) -> int: + existing = [int(m.group(1)) for f in slides_dir.glob("slide*.xml") + if (m := re.match(r"slide(\d+)\.xml", f.name))] + return max(existing) + 1 if existing else 1 + + +def create_slide_from_layout(unpacked_dir: Path, layout_file: str) -> None: + slides_dir = unpacked_dir / "ppt" / "slides" + rels_dir = slides_dir / "_rels" + layouts_dir = unpacked_dir / "ppt" / "slideLayouts" + + layout_path = layouts_dir / layout_file + if not layout_path.exists(): + print(f"Error: {layout_path} not found", file=sys.stderr) + sys.exit(1) + + next_num = get_next_slide_number(slides_dir) + dest = f"slide{next_num}.xml" + dest_slide = slides_dir / dest + dest_rels = rels_dir / f"{dest}.rels" + + slide_xml = ''' + + + + + + + + + + + + + + + + + + + + + +''' + dest_slide.write_text(slide_xml, encoding="utf-8") + + rels_dir.mkdir(exist_ok=True) + rels_xml = f''' + + +''' + dest_rels.write_text(rels_xml, encoding="utf-8") + + _add_to_content_types(unpacked_dir, dest) + + rid = _add_to_presentation_rels(unpacked_dir, dest) + + next_slide_id = _get_next_slide_id(unpacked_dir) + + print(f"Created {dest} from {layout_file}") + print(f'Add to presentation.xml : ') + + +def duplicate_slide(unpacked_dir: Path, source: str) -> None: + slides_dir = unpacked_dir / "ppt" / "slides" + rels_dir = slides_dir / "_rels" + + source_slide = slides_dir / source + + if not source_slide.exists(): + print(f"Error: {source_slide} not found", file=sys.stderr) + sys.exit(1) + + next_num = get_next_slide_number(slides_dir) + dest = f"slide{next_num}.xml" + dest_slide = slides_dir / dest + + source_rels = rels_dir / f"{source}.rels" + dest_rels = rels_dir / f"{dest}.rels" + + shutil.copy2(source_slide, dest_slide) + + if source_rels.exists(): + shutil.copy2(source_rels, dest_rels) + + rels_content = dest_rels.read_text(encoding="utf-8") + rels_content = re.sub( + r'\s*]*Type="[^"]*notesSlide"[^>]*/>\s*', + "\n", + rels_content, + ) + dest_rels.write_text(rels_content, encoding="utf-8") + + _add_to_content_types(unpacked_dir, dest) + + rid = _add_to_presentation_rels(unpacked_dir, dest) + + next_slide_id = _get_next_slide_id(unpacked_dir) + + print(f"Created {dest} from {source}") + print(f'Add to presentation.xml : ') + + +def _add_to_content_types(unpacked_dir: Path, dest: str) -> None: + content_types_path = unpacked_dir / "[Content_Types].xml" + content_types = content_types_path.read_text(encoding="utf-8") + + new_override = f'' + + if f"/ppt/slides/{dest}" not in content_types: + content_types = content_types.replace("", f" {new_override}\n") + content_types_path.write_text(content_types, encoding="utf-8") + + +def _add_to_presentation_rels(unpacked_dir: Path, dest: str) -> str: + pres_rels_path = unpacked_dir / "ppt" / "_rels" / "presentation.xml.rels" + pres_rels = pres_rels_path.read_text(encoding="utf-8") + + rids = [int(m) for m in re.findall(r'Id="rId(\d+)"', pres_rels)] + next_rid = max(rids) + 1 if rids else 1 + rid = f"rId{next_rid}" + + new_rel = f'' + + if f"slides/{dest}" not in pres_rels: + pres_rels = pres_rels.replace("", f" {new_rel}\n") + pres_rels_path.write_text(pres_rels, encoding="utf-8") + + return rid + + +def _get_next_slide_id(unpacked_dir: Path) -> int: + pres_path = unpacked_dir / "ppt" / "presentation.xml" + pres_content = pres_path.read_text(encoding="utf-8") + slide_ids = [int(m) for m in re.findall(r']*id="(\d+)"', pres_content)] + return max(slide_ids) + 1 if slide_ids else 256 + + +def parse_source(source: str) -> tuple[str, str | None]: + if source.startswith("slideLayout") and source.endswith(".xml"): + return ("layout", source) + + return ("slide", None) + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python add_slide.py ", file=sys.stderr) + print("", file=sys.stderr) + print("Source can be:", file=sys.stderr) + print(" slide2.xml - duplicate an existing slide", file=sys.stderr) + print(" slideLayout2.xml - create from a layout template", file=sys.stderr) + print("", file=sys.stderr) + print("To see available layouts: ls /ppt/slideLayouts/", file=sys.stderr) + sys.exit(1) + + unpacked_dir = Path(sys.argv[1]) + source = sys.argv[2] + + if not unpacked_dir.exists(): + print(f"Error: {unpacked_dir} not found", file=sys.stderr) + sys.exit(1) + + source_type, layout_file = parse_source(source) + + if source_type == "layout" and layout_file is not None: + create_slide_from_layout(unpacked_dir, layout_file) + else: + duplicate_slide(unpacked_dir, source) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/clean.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/clean.py new file mode 100644 index 00000000..3d13994c --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/clean.py @@ -0,0 +1,286 @@ +"""Remove unreferenced files from an unpacked PPTX directory. + +Usage: python clean.py + +Example: + python clean.py unpacked/ + +This script removes: +- Orphaned slides (not in sldIdLst) and their relationships +- [trash] directory (unreferenced files) +- Orphaned .rels files for deleted resources +- Unreferenced media, embeddings, charts, diagrams, drawings, ink files +- Unreferenced theme files +- Unreferenced notes slides +- Content-Type overrides for deleted files +""" + +import sys +from pathlib import Path + +import defusedxml.minidom + + +import re + + +def get_slides_in_sldidlst(unpacked_dir: Path) -> set[str]: + pres_path = unpacked_dir / "ppt" / "presentation.xml" + pres_rels_path = unpacked_dir / "ppt" / "_rels" / "presentation.xml.rels" + + if not pres_path.exists() or not pres_rels_path.exists(): + return set() + + rels_dom = defusedxml.minidom.parse(str(pres_rels_path)) + rid_to_slide = {} + for rel in rels_dom.getElementsByTagName("Relationship"): + rid = rel.getAttribute("Id") + target = rel.getAttribute("Target") + rel_type = rel.getAttribute("Type") + if "slide" in rel_type and target.startswith("slides/"): + rid_to_slide[rid] = target.replace("slides/", "") + + pres_content = pres_path.read_text(encoding="utf-8") + referenced_rids = set(re.findall(r']*r:id="([^"]+)"', pres_content)) + + return {rid_to_slide[rid] for rid in referenced_rids if rid in rid_to_slide} + + +def remove_orphaned_slides(unpacked_dir: Path) -> list[str]: + slides_dir = unpacked_dir / "ppt" / "slides" + slides_rels_dir = slides_dir / "_rels" + pres_rels_path = unpacked_dir / "ppt" / "_rels" / "presentation.xml.rels" + + if not slides_dir.exists(): + return [] + + referenced_slides = get_slides_in_sldidlst(unpacked_dir) + removed = [] + + for slide_file in slides_dir.glob("slide*.xml"): + if slide_file.name not in referenced_slides: + rel_path = slide_file.relative_to(unpacked_dir) + slide_file.unlink() + removed.append(str(rel_path)) + + rels_file = slides_rels_dir / f"{slide_file.name}.rels" + if rels_file.exists(): + rels_file.unlink() + removed.append(str(rels_file.relative_to(unpacked_dir))) + + if removed and pres_rels_path.exists(): + rels_dom = defusedxml.minidom.parse(str(pres_rels_path)) + changed = False + + for rel in list(rels_dom.getElementsByTagName("Relationship")): + target = rel.getAttribute("Target") + if target.startswith("slides/"): + slide_name = target.replace("slides/", "") + if slide_name not in referenced_slides: + if rel.parentNode: + rel.parentNode.removeChild(rel) + changed = True + + if changed: + with open(pres_rels_path, "wb") as f: + f.write(rels_dom.toxml(encoding="utf-8")) + + return removed + + +def remove_trash_directory(unpacked_dir: Path) -> list[str]: + trash_dir = unpacked_dir / "[trash]" + removed = [] + + if trash_dir.exists() and trash_dir.is_dir(): + for file_path in trash_dir.iterdir(): + if file_path.is_file(): + rel_path = file_path.relative_to(unpacked_dir) + removed.append(str(rel_path)) + file_path.unlink() + trash_dir.rmdir() + + return removed + + +def get_slide_referenced_files(unpacked_dir: Path) -> set: + referenced = set() + slides_rels_dir = unpacked_dir / "ppt" / "slides" / "_rels" + + if not slides_rels_dir.exists(): + return referenced + + for rels_file in slides_rels_dir.glob("*.rels"): + dom = defusedxml.minidom.parse(str(rels_file)) + for rel in dom.getElementsByTagName("Relationship"): + target = rel.getAttribute("Target") + if not target: + continue + target_path = (rels_file.parent.parent / target).resolve() + try: + referenced.add(target_path.relative_to(unpacked_dir.resolve())) + except ValueError: + pass + + return referenced + + +def remove_orphaned_rels_files(unpacked_dir: Path) -> list[str]: + resource_dirs = ["charts", "diagrams", "drawings"] + removed = [] + slide_referenced = get_slide_referenced_files(unpacked_dir) + + for dir_name in resource_dirs: + rels_dir = unpacked_dir / "ppt" / dir_name / "_rels" + if not rels_dir.exists(): + continue + + for rels_file in rels_dir.glob("*.rels"): + resource_file = rels_dir.parent / rels_file.name.replace(".rels", "") + try: + resource_rel_path = resource_file.resolve().relative_to(unpacked_dir.resolve()) + except ValueError: + continue + + if not resource_file.exists() or resource_rel_path not in slide_referenced: + rels_file.unlink() + rel_path = rels_file.relative_to(unpacked_dir) + removed.append(str(rel_path)) + + return removed + + +def get_referenced_files(unpacked_dir: Path) -> set: + referenced = set() + + for rels_file in unpacked_dir.rglob("*.rels"): + dom = defusedxml.minidom.parse(str(rels_file)) + for rel in dom.getElementsByTagName("Relationship"): + target = rel.getAttribute("Target") + if not target: + continue + target_path = (rels_file.parent.parent / target).resolve() + try: + referenced.add(target_path.relative_to(unpacked_dir.resolve())) + except ValueError: + pass + + return referenced + + +def remove_orphaned_files(unpacked_dir: Path, referenced: set) -> list[str]: + resource_dirs = ["media", "embeddings", "charts", "diagrams", "tags", "drawings", "ink"] + removed = [] + + for dir_name in resource_dirs: + dir_path = unpacked_dir / "ppt" / dir_name + if not dir_path.exists(): + continue + + for file_path in dir_path.glob("*"): + if not file_path.is_file(): + continue + rel_path = file_path.relative_to(unpacked_dir) + if rel_path not in referenced: + file_path.unlink() + removed.append(str(rel_path)) + + theme_dir = unpacked_dir / "ppt" / "theme" + if theme_dir.exists(): + for file_path in theme_dir.glob("theme*.xml"): + rel_path = file_path.relative_to(unpacked_dir) + if rel_path not in referenced: + file_path.unlink() + removed.append(str(rel_path)) + theme_rels = theme_dir / "_rels" / f"{file_path.name}.rels" + if theme_rels.exists(): + theme_rels.unlink() + removed.append(str(theme_rels.relative_to(unpacked_dir))) + + notes_dir = unpacked_dir / "ppt" / "notesSlides" + if notes_dir.exists(): + for file_path in notes_dir.glob("*.xml"): + if not file_path.is_file(): + continue + rel_path = file_path.relative_to(unpacked_dir) + if rel_path not in referenced: + file_path.unlink() + removed.append(str(rel_path)) + + notes_rels_dir = notes_dir / "_rels" + if notes_rels_dir.exists(): + for file_path in notes_rels_dir.glob("*.rels"): + notes_file = notes_dir / file_path.name.replace(".rels", "") + if not notes_file.exists(): + file_path.unlink() + removed.append(str(file_path.relative_to(unpacked_dir))) + + return removed + + +def update_content_types(unpacked_dir: Path, removed_files: list[str]) -> None: + ct_path = unpacked_dir / "[Content_Types].xml" + if not ct_path.exists(): + return + + dom = defusedxml.minidom.parse(str(ct_path)) + changed = False + + for override in list(dom.getElementsByTagName("Override")): + part_name = override.getAttribute("PartName").lstrip("/") + if part_name in removed_files: + if override.parentNode: + override.parentNode.removeChild(override) + changed = True + + if changed: + with open(ct_path, "wb") as f: + f.write(dom.toxml(encoding="utf-8")) + + +def clean_unused_files(unpacked_dir: Path) -> list[str]: + all_removed = [] + + slides_removed = remove_orphaned_slides(unpacked_dir) + all_removed.extend(slides_removed) + + trash_removed = remove_trash_directory(unpacked_dir) + all_removed.extend(trash_removed) + + while True: + removed_rels = remove_orphaned_rels_files(unpacked_dir) + referenced = get_referenced_files(unpacked_dir) + removed_files = remove_orphaned_files(unpacked_dir, referenced) + + total_removed = removed_rels + removed_files + if not total_removed: + break + + all_removed.extend(total_removed) + + if all_removed: + update_content_types(unpacked_dir, all_removed) + + return all_removed + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python clean.py ", file=sys.stderr) + print("Example: python clean.py unpacked/", file=sys.stderr) + sys.exit(1) + + unpacked_dir = Path(sys.argv[1]) + + if not unpacked_dir.exists(): + print(f"Error: {unpacked_dir} not found", file=sys.stderr) + sys.exit(1) + + removed = clean_unused_files(unpacked_dir) + + if removed: + print(f"Removed {len(removed)} unreferenced files:") + for f in removed: + print(f" {f}") + else: + print("No unreferenced files found") diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/generate.js b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/generate.js new file mode 100644 index 00000000..7d5bc965 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/generate.js @@ -0,0 +1,173 @@ +#!/usr/bin/env node + +const fs = require("fs"); + +// Chart type mapping, consistent with src/utils/callTool.ts +const CHART_TYPE_MAP = { + generate_area_chart: "area", + generate_bar_chart: "bar", + generate_boxplot_chart: "boxplot", + generate_column_chart: "column", + generate_district_map: "district-map", + generate_dual_axes_chart: "dual-axes", + generate_fishbone_diagram: "fishbone-diagram", + generate_flow_diagram: "flow-diagram", + generate_funnel_chart: "funnel", + generate_histogram_chart: "histogram", + generate_line_chart: "line", + generate_liquid_chart: "liquid", + generate_mind_map: "mind-map", + generate_network_graph: "network-graph", + generate_organization_chart: "organization-chart", + generate_path_map: "path-map", + generate_pie_chart: "pie", + generate_pin_map: "pin-map", + generate_radar_chart: "radar", + generate_sankey_chart: "sankey", + generate_scatter_chart: "scatter", + generate_treemap_chart: "treemap", + generate_venn_chart: "venn", + generate_violin_chart: "violin", + generate_word_cloud_chart: "word-cloud", +}; + +function getVisRequestServer() { + return ( + process.env.VIS_REQUEST_SERVER || + "https://antv-studio.alipay.com/api/gpt-vis" + ); +} + +function getServiceIdentifier() { + return process.env.SERVICE_ID; +} + +async function httpPost(url, payload) { + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`HTTP ${response.status}: ${text}`); + } + + return response.json(); +} + +async function generateChartUrl(chartType, options) { + const url = getVisRequestServer(); + const payload = { + type: chartType, + source: "chart-visualization-creator", + ...options, + }; + + const data = await httpPost(url, payload); + + if (!data.success) { + throw new Error(data.errorMessage || "Unknown error"); + } + + return data.resultObj; +} + +async function generateMap(tool, inputData) { + const url = getVisRequestServer(); + const payload = { + serviceId: getServiceIdentifier(), + tool, + input: inputData, + source: "chart-visualization-creator", + }; + + const data = await httpPost(url, payload); + + if (!data.success) { + throw new Error(data.errorMessage || "Unknown error"); + } + + return data.resultObj; +} + +async function main() { + if (process.argv.length < 3) { + console.error("Usage: node generate.js "); + process.exit(1); + } + + const specArg = process.argv[2]; + let spec; + + try { + if (fs.existsSync(specArg)) { + const fileContent = fs.readFileSync(specArg, "utf-8"); + spec = JSON.parse(fileContent); + } else { + spec = JSON.parse(specArg); + } + } catch (e) { + console.error(`Error parsing spec: ${e.message}`); + process.exit(1); + } + + const specs = Array.isArray(spec) ? spec : [spec]; + + for (const item of specs) { + const tool = item.tool; + const args = item.args || {}; + + if (!tool) { + console.error( + `Error: 'tool' field missing in spec: ${JSON.stringify(item)}`, + ); + continue; + } + + const chartType = CHART_TYPE_MAP[tool]; + if (!chartType) { + console.error(`Error: Unknown tool '${tool}'`); + continue; + } + + const isMapChartTool = [ + "generate_district_map", + "generate_path_map", + "generate_pin_map", + ].includes(tool); + + try { + if (isMapChartTool) { + const result = await generateMap(tool, args); + if (result && result.content) { + for (const contentItem of result.content) { + if (contentItem.type === "text") { + console.log(contentItem.text); + } + } + } else { + console.log(JSON.stringify(result)); + } + } else { + const url = await generateChartUrl(chartType, args); + console.log(url); + } + } catch (e) { + console.error(`Error generating chart for ${tool}: ${e.message}`); + } + } +} + +if (require.main === module) { + main().catch((err) => { + console.error(err.message); + process.exit(1); + }); +} + +// Export functions for testing +module.exports = { generateChartUrl, generateMap, httpPost, CHART_TYPE_MAP }; diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/__init__.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/merge_runs.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/merge_runs.py new file mode 100644 index 00000000..ad7c25ee --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/merge_runs.py @@ -0,0 +1,199 @@ +"""Merge adjacent runs with identical formatting in DOCX. + +Merges adjacent elements that have identical properties. +Works on runs in paragraphs and inside tracked changes (, ). + +Also: +- Removes rsid attributes from runs (revision metadata that doesn't affect rendering) +- Removes proofErr elements (spell/grammar markers that block merging) +""" + +from pathlib import Path + +import defusedxml.minidom + + +def merge_runs(input_dir: str) -> tuple[int, str]: + doc_xml = Path(input_dir) / "word" / "document.xml" + + if not doc_xml.exists(): + return 0, f"Error: {doc_xml} not found" + + try: + dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8")) + root = dom.documentElement + + _remove_elements(root, "proofErr") + _strip_run_rsid_attrs(root) + + containers = {run.parentNode for run in _find_elements(root, "r")} + + merge_count = 0 + for container in containers: + merge_count += _merge_runs_in(container) + + doc_xml.write_bytes(dom.toxml(encoding="UTF-8")) + return merge_count, f"Merged {merge_count} runs" + + except Exception as e: + return 0, f"Error: {e}" + + + + +def _find_elements(root, tag: str) -> list: + results = [] + + def traverse(node): + if node.nodeType == node.ELEMENT_NODE: + name = node.localName or node.tagName + if name == tag or name.endswith(f":{tag}"): + results.append(node) + for child in node.childNodes: + traverse(child) + + traverse(root) + return results + + +def _get_child(parent, tag: str): + for child in parent.childNodes: + if child.nodeType == child.ELEMENT_NODE: + name = child.localName or child.tagName + if name == tag or name.endswith(f":{tag}"): + return child + return None + + +def _get_children(parent, tag: str) -> list: + results = [] + for child in parent.childNodes: + if child.nodeType == child.ELEMENT_NODE: + name = child.localName or child.tagName + if name == tag or name.endswith(f":{tag}"): + results.append(child) + return results + + +def _is_adjacent(elem1, elem2) -> bool: + node = elem1.nextSibling + while node: + if node == elem2: + return True + if node.nodeType == node.ELEMENT_NODE: + return False + if node.nodeType == node.TEXT_NODE and node.data.strip(): + return False + node = node.nextSibling + return False + + + + +def _remove_elements(root, tag: str): + for elem in _find_elements(root, tag): + if elem.parentNode: + elem.parentNode.removeChild(elem) + + +def _strip_run_rsid_attrs(root): + for run in _find_elements(root, "r"): + for attr in list(run.attributes.values()): + if "rsid" in attr.name.lower(): + run.removeAttribute(attr.name) + + + + +def _merge_runs_in(container) -> int: + merge_count = 0 + run = _first_child_run(container) + + while run: + while True: + next_elem = _next_element_sibling(run) + if next_elem and _is_run(next_elem) and _can_merge(run, next_elem): + _merge_run_content(run, next_elem) + container.removeChild(next_elem) + merge_count += 1 + else: + break + + _consolidate_text(run) + run = _next_sibling_run(run) + + return merge_count + + +def _first_child_run(container): + for child in container.childNodes: + if child.nodeType == child.ELEMENT_NODE and _is_run(child): + return child + return None + + +def _next_element_sibling(node): + sibling = node.nextSibling + while sibling: + if sibling.nodeType == sibling.ELEMENT_NODE: + return sibling + sibling = sibling.nextSibling + return None + + +def _next_sibling_run(node): + sibling = node.nextSibling + while sibling: + if sibling.nodeType == sibling.ELEMENT_NODE: + if _is_run(sibling): + return sibling + sibling = sibling.nextSibling + return None + + +def _is_run(node) -> bool: + name = node.localName or node.tagName + return name == "r" or name.endswith(":r") + + +def _can_merge(run1, run2) -> bool: + rpr1 = _get_child(run1, "rPr") + rpr2 = _get_child(run2, "rPr") + + if (rpr1 is None) != (rpr2 is None): + return False + if rpr1 is None: + return True + return rpr1.toxml() == rpr2.toxml() + + +def _merge_run_content(target, source): + for child in list(source.childNodes): + if child.nodeType == child.ELEMENT_NODE: + name = child.localName or child.tagName + if name != "rPr" and not name.endswith(":rPr"): + target.appendChild(child) + + +def _consolidate_text(run): + t_elements = _get_children(run, "t") + + for i in range(len(t_elements) - 1, 0, -1): + curr, prev = t_elements[i], t_elements[i - 1] + + if _is_adjacent(prev, curr): + prev_text = prev.firstChild.data if prev.firstChild else "" + curr_text = curr.firstChild.data if curr.firstChild else "" + merged = prev_text + curr_text + + if prev.firstChild: + prev.firstChild.data = merged + else: + prev.appendChild(run.ownerDocument.createTextNode(merged)) + + if merged.startswith(" ") or merged.endswith(" "): + prev.setAttribute("xml:space", "preserve") + elif prev.hasAttribute("xml:space"): + prev.removeAttribute("xml:space") + + run.removeChild(curr) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/simplify_redlines.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/simplify_redlines.py new file mode 100644 index 00000000..db963bb9 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/helpers/simplify_redlines.py @@ -0,0 +1,197 @@ +"""Simplify tracked changes by merging adjacent w:ins or w:del elements. + +Merges adjacent elements from the same author into a single element. +Same for elements. This makes heavily-redlined documents easier to +work with by reducing the number of tracked change wrappers. + +Rules: +- Only merges w:ins with w:ins, w:del with w:del (same element type) +- Only merges if same author (ignores timestamp differences) +- Only merges if truly adjacent (only whitespace between them) +""" + +import xml.etree.ElementTree as ET +import zipfile +from pathlib import Path + +import defusedxml.minidom + +WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def simplify_redlines(input_dir: str) -> tuple[int, str]: + doc_xml = Path(input_dir) / "word" / "document.xml" + + if not doc_xml.exists(): + return 0, f"Error: {doc_xml} not found" + + try: + dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8")) + root = dom.documentElement + + merge_count = 0 + + containers = _find_elements(root, "p") + _find_elements(root, "tc") + + for container in containers: + merge_count += _merge_tracked_changes_in(container, "ins") + merge_count += _merge_tracked_changes_in(container, "del") + + doc_xml.write_bytes(dom.toxml(encoding="UTF-8")) + return merge_count, f"Simplified {merge_count} tracked changes" + + except Exception as e: + return 0, f"Error: {e}" + + +def _merge_tracked_changes_in(container, tag: str) -> int: + merge_count = 0 + + tracked = [ + child + for child in container.childNodes + if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag) + ] + + if len(tracked) < 2: + return 0 + + i = 0 + while i < len(tracked) - 1: + curr = tracked[i] + next_elem = tracked[i + 1] + + if _can_merge_tracked(curr, next_elem): + _merge_tracked_content(curr, next_elem) + container.removeChild(next_elem) + tracked.pop(i + 1) + merge_count += 1 + else: + i += 1 + + return merge_count + + +def _is_element(node, tag: str) -> bool: + name = node.localName or node.tagName + return name == tag or name.endswith(f":{tag}") + + +def _get_author(elem) -> str: + author = elem.getAttribute("w:author") + if not author: + for attr in elem.attributes.values(): + if attr.localName == "author" or attr.name.endswith(":author"): + return attr.value + return author + + +def _can_merge_tracked(elem1, elem2) -> bool: + if _get_author(elem1) != _get_author(elem2): + return False + + node = elem1.nextSibling + while node and node != elem2: + if node.nodeType == node.ELEMENT_NODE: + return False + if node.nodeType == node.TEXT_NODE and node.data.strip(): + return False + node = node.nextSibling + + return True + + +def _merge_tracked_content(target, source): + while source.firstChild: + child = source.firstChild + source.removeChild(child) + target.appendChild(child) + + +def _find_elements(root, tag: str) -> list: + results = [] + + def traverse(node): + if node.nodeType == node.ELEMENT_NODE: + name = node.localName or node.tagName + if name == tag or name.endswith(f":{tag}"): + results.append(node) + for child in node.childNodes: + traverse(child) + + traverse(root) + return results + + +def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]: + if not doc_xml_path.exists(): + return {} + + try: + tree = ET.parse(doc_xml_path) + root = tree.getroot() + except ET.ParseError: + return {} + + namespaces = {"w": WORD_NS} + author_attr = f"{{{WORD_NS}}}author" + + authors: dict[str, int] = {} + for tag in ["ins", "del"]: + for elem in root.findall(f".//w:{tag}", namespaces): + author = elem.get(author_attr) + if author: + authors[author] = authors.get(author, 0) + 1 + + return authors + + +def _get_authors_from_docx(docx_path: Path) -> dict[str, int]: + try: + with zipfile.ZipFile(docx_path, "r") as zf: + if "word/document.xml" not in zf.namelist(): + return {} + with zf.open("word/document.xml") as f: + tree = ET.parse(f) + root = tree.getroot() + + namespaces = {"w": WORD_NS} + author_attr = f"{{{WORD_NS}}}author" + + authors: dict[str, int] = {} + for tag in ["ins", "del"]: + for elem in root.findall(f".//w:{tag}", namespaces): + author = elem.get(author_attr) + if author: + authors[author] = authors.get(author, 0) + 1 + return authors + except (zipfile.BadZipFile, ET.ParseError): + return {} + + +def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str: + modified_xml = modified_dir / "word" / "document.xml" + modified_authors = get_tracked_change_authors(modified_xml) + + if not modified_authors: + return default + + original_authors = _get_authors_from_docx(original_docx) + + new_changes: dict[str, int] = {} + for author, count in modified_authors.items(): + original_count = original_authors.get(author, 0) + diff = count - original_count + if diff > 0: + new_changes[author] = diff + + if not new_changes: + return default + + if len(new_changes) == 1: + return next(iter(new_changes)) + + raise ValueError( + f"Multiple authors added new changes: {new_changes}. " + "Cannot infer which author to validate." + ) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/pack.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/pack.py new file mode 100644 index 00000000..db29ed8b --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/pack.py @@ -0,0 +1,159 @@ +"""Pack a directory into a DOCX, PPTX, or XLSX file. + +Validates with auto-repair, condenses XML formatting, and creates the Office file. + +Usage: + python pack.py [--original ] [--validate true|false] + +Examples: + python pack.py unpacked/ output.docx --original input.docx + python pack.py unpacked/ output.pptx --validate false +""" + +import argparse +import sys +import shutil +import tempfile +import zipfile +from pathlib import Path + +import defusedxml.minidom + +from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator + +def pack( + input_directory: str, + output_file: str, + original_file: str | None = None, + validate: bool = True, + infer_author_func=None, +) -> tuple[None, str]: + input_dir = Path(input_directory) + output_path = Path(output_file) + suffix = output_path.suffix.lower() + + if not input_dir.is_dir(): + return None, f"Error: {input_dir} is not a directory" + + if suffix not in {".docx", ".pptx", ".xlsx"}: + return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file" + + if validate and original_file: + original_path = Path(original_file) + if original_path.exists(): + success, output = _run_validation( + input_dir, original_path, suffix, infer_author_func + ) + if output: + print(output) + if not success: + return None, f"Error: Validation failed for {input_dir}" + + with tempfile.TemporaryDirectory() as temp_dir: + temp_content_dir = Path(temp_dir) / "content" + shutil.copytree(input_dir, temp_content_dir) + + for pattern in ["*.xml", "*.rels"]: + for xml_file in temp_content_dir.rglob(pattern): + _condense_xml(xml_file) + + output_path.parent.mkdir(parents=True, exist_ok=True) + with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf: + for f in temp_content_dir.rglob("*"): + if f.is_file(): + zf.write(f, f.relative_to(temp_content_dir)) + + return None, f"Successfully packed {input_dir} to {output_file}" + + +def _run_validation( + unpacked_dir: Path, + original_file: Path, + suffix: str, + infer_author_func=None, +) -> tuple[bool, str | None]: + output_lines = [] + validators = [] + + if suffix == ".docx": + author = "Claude" + if infer_author_func: + try: + author = infer_author_func(unpacked_dir, original_file) + except ValueError as e: + print(f"Warning: {e} Using default author 'Claude'.", file=sys.stderr) + + validators = [ + DOCXSchemaValidator(unpacked_dir, original_file), + RedliningValidator(unpacked_dir, original_file, author=author), + ] + elif suffix == ".pptx": + validators = [PPTXSchemaValidator(unpacked_dir, original_file)] + + if not validators: + return True, None + + total_repairs = sum(v.repair() for v in validators) + if total_repairs: + output_lines.append(f"Auto-repaired {total_repairs} issue(s)") + + success = all(v.validate() for v in validators) + + if success: + output_lines.append("All validations PASSED!") + + return success, "\n".join(output_lines) if output_lines else None + + +def _condense_xml(xml_file: Path) -> None: + try: + with open(xml_file, encoding="utf-8") as f: + dom = defusedxml.minidom.parse(f) + + for element in dom.getElementsByTagName("*"): + if element.tagName.endswith(":t"): + continue + + for child in list(element.childNodes): + if ( + child.nodeType == child.TEXT_NODE + and child.nodeValue + and child.nodeValue.strip() == "" + ) or child.nodeType == child.COMMENT_NODE: + element.removeChild(child) + + xml_file.write_bytes(dom.toxml(encoding="UTF-8")) + except Exception as e: + print(f"ERROR: Failed to parse {xml_file.name}: {e}", file=sys.stderr) + raise + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Pack a directory into a DOCX, PPTX, or XLSX file" + ) + parser.add_argument("input_directory", help="Unpacked Office document directory") + parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)") + parser.add_argument( + "--original", + help="Original file for validation comparison", + ) + parser.add_argument( + "--validate", + type=lambda x: x.lower() == "true", + default=True, + metavar="true|false", + help="Run validation with auto-repair (default: true)", + ) + args = parser.parse_args() + + _, message = pack( + args.input_directory, + args.output_file, + original_file=args.original, + validate=args.validate, + ) + print(message) + + if "Error" in message: + sys.exit(1) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd new file mode 100644 index 00000000..6454ef9a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd @@ -0,0 +1,1499 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd new file mode 100644 index 00000000..afa4f463 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd @@ -0,0 +1,146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd new file mode 100644 index 00000000..64e66b8a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd @@ -0,0 +1,1085 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd new file mode 100644 index 00000000..687eea82 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd @@ -0,0 +1,11 @@ + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd new file mode 100644 index 00000000..6ac81b06 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd @@ -0,0 +1,3081 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd new file mode 100644 index 00000000..1dbf0514 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd new file mode 100644 index 00000000..f1af17db --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd new file mode 100644 index 00000000..0a185ab6 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd @@ -0,0 +1,287 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd new file mode 100644 index 00000000..14ef4888 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd @@ -0,0 +1,1676 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd new file mode 100644 index 00000000..c20f3bf1 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd new file mode 100644 index 00000000..ac602522 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd @@ -0,0 +1,144 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd new file mode 100644 index 00000000..424b8ba8 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd @@ -0,0 +1,174 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd new file mode 100644 index 00000000..2bddce29 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd new file mode 100644 index 00000000..8a8c18ba --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd new file mode 100644 index 00000000..5c42706a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd new file mode 100644 index 00000000..853c341c --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd new file mode 100644 index 00000000..da835ee8 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd @@ -0,0 +1,195 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd new file mode 100644 index 00000000..87ad2658 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd @@ -0,0 +1,582 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd new file mode 100644 index 00000000..9e86f1b2 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd new file mode 100644 index 00000000..d0be42e7 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd @@ -0,0 +1,4439 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd new file mode 100644 index 00000000..8821dd18 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd @@ -0,0 +1,570 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd new file mode 100644 index 00000000..ca2575c7 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd @@ -0,0 +1,509 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd new file mode 100644 index 00000000..dd079e60 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd @@ -0,0 +1,12 @@ + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd new file mode 100644 index 00000000..3dd6cf62 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd new file mode 100644 index 00000000..f1041e34 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd new file mode 100644 index 00000000..9c5b7a63 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd @@ -0,0 +1,3646 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd new file mode 100644 index 00000000..0f13678d --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd @@ -0,0 +1,116 @@ + + + + + + See http://www.w3.org/XML/1998/namespace.html and + http://www.w3.org/TR/REC-xml for information about this namespace. + + This schema document describes the XML namespace, in a form + suitable for import by other schema documents. + + Note that local names in this namespace are intended to be defined + only by the World Wide Web Consortium or its subgroups. The + following names are currently defined in this namespace and should + not be used with conflicting semantics by any Working Group, + specification, or document instance: + + base (as an attribute name): denotes an attribute whose value + provides a URI to be used as the base for interpreting any + relative URIs in the scope of the element on which it + appears; its value is inherited. This name is reserved + by virtue of its definition in the XML Base specification. + + lang (as an attribute name): denotes an attribute whose value + is a language code for the natural language of the content of + any element; its value is inherited. This name is reserved + by virtue of its definition in the XML specification. + + space (as an attribute name): denotes an attribute whose + value is a keyword indicating what whitespace processing + discipline is intended for the content of the element; its + value is inherited. This name is reserved by virtue of its + definition in the XML specification. + + Father (in any context at all): denotes Jon Bosak, the chair of + the original XML Working Group. This name is reserved by + the following decision of the W3C XML Plenary and + XML Coordination groups: + + In appreciation for his vision, leadership and dedication + the W3C XML Plenary on this 10th day of February, 2000 + reserves for Jon Bosak in perpetuity the XML name + xml:Father + + + + + This schema defines attributes and an attribute group + suitable for use by + schemas wishing to allow xml:base, xml:lang or xml:space attributes + on elements they define. + + To enable this, such a schema must import this schema + for the XML namespace, e.g. as follows: + <schema . . .> + . . . + <import namespace="http://www.w3.org/XML/1998/namespace" + schemaLocation="http://www.w3.org/2001/03/xml.xsd"/> + + Subsequently, qualified reference to any of the attributes + or the group defined below will have the desired effect, e.g. + + <type . . .> + . . . + <attributeGroup ref="xml:specialAttrs"/> + + will define a type which will schema-validate an instance + element with any of those attributes + + + + In keeping with the XML Schema WG's standard versioning + policy, this schema document will persist at + http://www.w3.org/2001/03/xml.xsd. + At the date of issue it can also be found at + http://www.w3.org/2001/xml.xsd. + The schema document at that URI may however change in the future, + in order to remain compatible with the latest version of XML Schema + itself. In other words, if the XML Schema namespace changes, the version + of this document at + http://www.w3.org/2001/xml.xsd will change + accordingly; the version at + http://www.w3.org/2001/03/xml.xsd will not change. + + + + + + In due course, we should install the relevant ISO 2- and 3-letter + codes as the enumerated possible values . . . + + + + + + + + + + + + + + + See http://www.w3.org/TR/xmlbase/ for + information about this attribute. + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd new file mode 100644 index 00000000..a6de9d27 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd new file mode 100644 index 00000000..10e978b6 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd new file mode 100644 index 00000000..4248bf7a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd new file mode 100644 index 00000000..56497467 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/mce/mc.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/mce/mc.xsd new file mode 100644 index 00000000..ef725457 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/mce/mc.xsd @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2010.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2010.xsd new file mode 100644 index 00000000..f65f7777 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2010.xsd @@ -0,0 +1,560 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2012.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2012.xsd new file mode 100644 index 00000000..6b00755a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2012.xsd @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2018.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2018.xsd new file mode 100644 index 00000000..f321d333 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-2018.xsd @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-cex-2018.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-cex-2018.xsd new file mode 100644 index 00000000..364c6a9b --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-cex-2018.xsd @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-cid-2016.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-cid-2016.xsd new file mode 100644 index 00000000..fed9d15b --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-cid-2016.xsd @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd new file mode 100644 index 00000000..680cf154 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd @@ -0,0 +1,4 @@ + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-symex-2015.xsd b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-symex-2015.xsd new file mode 100644 index 00000000..89ada908 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/schemas/microsoft/wml-symex-2015.xsd @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/soffice.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/soffice.py new file mode 100644 index 00000000..ab242954 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/soffice.py @@ -0,0 +1,184 @@ +""" +Helper for running LibreOffice (soffice) in environments where AF_UNIX +sockets may be blocked (e.g., sandboxed VMs). Detects the restriction +at runtime and applies an LD_PRELOAD shim if needed. + +Usage: + from office.soffice import run_soffice, get_soffice_env + + # Option 1 – run soffice directly + result = run_soffice(["--headless", "--convert-to", "pdf", "input.docx"]) + + # Option 2 – get env dict for your own subprocess calls + env = get_soffice_env() + subprocess.run(["soffice", ...], env=env) +""" + +import os +import socket +import subprocess +import tempfile +from pathlib import Path + + +def get_soffice_env() -> dict: + env = os.environ.copy() + env["SAL_USE_VCLPLUGIN"] = "svp" + + if _needs_shim(): + shim = _ensure_shim() + env["LD_PRELOAD"] = str(shim) + + return env + + +def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess: + env = get_soffice_env() + return subprocess.run(["soffice"] + args, env=env, **kwargs) + + + +_SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so" + + +def _needs_shim() -> bool: + """Check if AF_UNIX socket shim is needed (Linux/Unix only).""" + try: + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.close() + return False + except (OSError, AttributeError): + return True + + +def _ensure_shim() -> Path: + if _SHIM_SO.exists(): + return _SHIM_SO + + src = Path(tempfile.gettempdir()) / "lo_socket_shim.c" + src.write_text(_SHIM_SOURCE) + subprocess.run( + ["gcc", "-shared", "-fPIC", "-o", str(_SHIM_SO), str(src), "-ldl"], + check=True, + capture_output=True, + ) + src.unlink() + return _SHIM_SO + + + +_SHIM_SOURCE = r""" +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +static int (*real_socket)(int, int, int); +static int (*real_socketpair)(int, int, int, int[2]); +static int (*real_listen)(int, int); +static int (*real_accept)(int, struct sockaddr *, socklen_t *); +static int (*real_close)(int); +static int (*real_read)(int, void *, size_t); + +/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */ +static int is_shimmed[1024]; +static int peer_of[1024]; +static int wake_r[1024]; /* accept() blocks reading this */ +static int wake_w[1024]; /* close() writes to this */ +static int listener_fd = -1; /* FD that received listen() */ + +__attribute__((constructor)) +static void init(void) { + real_socket = dlsym(RTLD_NEXT, "socket"); + real_socketpair = dlsym(RTLD_NEXT, "socketpair"); + real_listen = dlsym(RTLD_NEXT, "listen"); + real_accept = dlsym(RTLD_NEXT, "accept"); + real_close = dlsym(RTLD_NEXT, "close"); + real_read = dlsym(RTLD_NEXT, "read"); + for (int i = 0; i < 1024; i++) { + peer_of[i] = -1; + wake_r[i] = -1; + wake_w[i] = -1; + } +} + +/* ---- socket ---------------------------------------------------------- */ +int socket(int domain, int type, int protocol) { + if (domain == AF_UNIX) { + int fd = real_socket(domain, type, protocol); + if (fd >= 0) return fd; + /* socket(AF_UNIX) blocked – fall back to socketpair(). */ + int sv[2]; + if (real_socketpair(domain, type, protocol, sv) == 0) { + if (sv[0] >= 0 && sv[0] < 1024) { + is_shimmed[sv[0]] = 1; + peer_of[sv[0]] = sv[1]; + int wp[2]; + if (pipe(wp) == 0) { + wake_r[sv[0]] = wp[0]; + wake_w[sv[0]] = wp[1]; + } + } + return sv[0]; + } + errno = EPERM; + return -1; + } + return real_socket(domain, type, protocol); +} + +/* ---- listen ---------------------------------------------------------- */ +int listen(int sockfd, int backlog) { + if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) { + listener_fd = sockfd; + return 0; + } + return real_listen(sockfd, backlog); +} + +/* ---- accept ---------------------------------------------------------- */ +int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) { + if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) { + /* Block until close() writes to the wake pipe. */ + if (wake_r[sockfd] >= 0) { + char buf; + real_read(wake_r[sockfd], &buf, 1); + } + errno = ECONNABORTED; + return -1; + } + return real_accept(sockfd, addr, addrlen); +} + +/* ---- close ----------------------------------------------------------- */ +int close(int fd) { + if (fd >= 0 && fd < 1024 && is_shimmed[fd]) { + int was_listener = (fd == listener_fd); + is_shimmed[fd] = 0; + + if (wake_w[fd] >= 0) { /* unblock accept() */ + char c = 0; + write(wake_w[fd], &c, 1); + real_close(wake_w[fd]); + wake_w[fd] = -1; + } + if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd] = -1; } + if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; } + + if (was_listener) + _exit(0); /* conversion done – exit */ + } + return real_close(fd); +} +""" + + + +if __name__ == "__main__": + import sys + result = run_soffice(sys.argv[1:]) + sys.exit(result.returncode) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/unpack.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/unpack.py new file mode 100644 index 00000000..00152533 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/unpack.py @@ -0,0 +1,132 @@ +"""Unpack Office files (DOCX, PPTX, XLSX) for editing. + +Extracts the ZIP archive, pretty-prints XML files, and optionally: +- Merges adjacent runs with identical formatting (DOCX only) +- Simplifies adjacent tracked changes from same author (DOCX only) + +Usage: + python unpack.py [options] + +Examples: + python unpack.py document.docx unpacked/ + python unpack.py presentation.pptx unpacked/ + python unpack.py document.docx unpacked/ --merge-runs false +""" + +import argparse +import sys +import zipfile +from pathlib import Path + +import defusedxml.minidom + +from helpers.merge_runs import merge_runs as do_merge_runs +from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines + +SMART_QUOTE_REPLACEMENTS = { + "\u201c": "“", + "\u201d": "”", + "\u2018": "‘", + "\u2019": "’", +} + + +def unpack( + input_file: str, + output_directory: str, + merge_runs: bool = True, + simplify_redlines: bool = True, +) -> tuple[None, str]: + input_path = Path(input_file) + output_path = Path(output_directory) + suffix = input_path.suffix.lower() + + if not input_path.exists(): + return None, f"Error: {input_file} does not exist" + + if suffix not in {".docx", ".pptx", ".xlsx"}: + return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file" + + try: + output_path.mkdir(parents=True, exist_ok=True) + + with zipfile.ZipFile(input_path, "r") as zf: + zf.extractall(output_path) + + xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels")) + for xml_file in xml_files: + _pretty_print_xml(xml_file) + + message = f"Unpacked {input_file} ({len(xml_files)} XML files)" + + if suffix == ".docx": + if simplify_redlines: + simplify_count, _ = do_simplify_redlines(str(output_path)) + message += f", simplified {simplify_count} tracked changes" + + if merge_runs: + merge_count, _ = do_merge_runs(str(output_path)) + message += f", merged {merge_count} runs" + + for xml_file in xml_files: + _escape_smart_quotes(xml_file) + + return None, message + + except zipfile.BadZipFile: + return None, f"Error: {input_file} is not a valid Office file" + except Exception as e: + return None, f"Error unpacking: {e}" + + +def _pretty_print_xml(xml_file: Path) -> None: + try: + content = xml_file.read_text(encoding="utf-8") + dom = defusedxml.minidom.parseString(content) + xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8")) + except Exception: + pass + + +def _escape_smart_quotes(xml_file: Path) -> None: + try: + content = xml_file.read_text(encoding="utf-8") + for char, entity in SMART_QUOTE_REPLACEMENTS.items(): + content = content.replace(char, entity) + xml_file.write_text(content, encoding="utf-8") + except Exception: + pass + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Unpack an Office file (DOCX, PPTX, XLSX) for editing" + ) + parser.add_argument("input_file", help="Office file to unpack") + parser.add_argument("output_directory", help="Output directory") + parser.add_argument( + "--merge-runs", + type=lambda x: x.lower() == "true", + default=True, + metavar="true|false", + help="Merge adjacent runs with identical formatting (DOCX only, default: true)", + ) + parser.add_argument( + "--simplify-redlines", + type=lambda x: x.lower() == "true", + default=True, + metavar="true|false", + help="Merge adjacent tracked changes from same author (DOCX only, default: true)", + ) + args = parser.parse_args() + + _, message = unpack( + args.input_file, + args.output_directory, + merge_runs=args.merge_runs, + simplify_redlines=args.simplify_redlines, + ) + print(message) + + if "Error" in message: + sys.exit(1) diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validate.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validate.py new file mode 100644 index 00000000..03b01f6e --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validate.py @@ -0,0 +1,111 @@ +""" +Command line tool to validate Office document XML files against XSD schemas and tracked changes. + +Usage: + python validate.py [--original ] [--auto-repair] [--author NAME] + +The first argument can be either: +- An unpacked directory containing the Office document XML files +- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory + +Auto-repair fixes: +- paraId/durableId values that exceed OOXML limits +- Missing xml:space="preserve" on w:t elements with whitespace +""" + +import argparse +import sys +import tempfile +import zipfile +from pathlib import Path + +from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator + + +def main(): + parser = argparse.ArgumentParser(description="Validate Office document XML files") + parser.add_argument( + "path", + help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)", + ) + parser.add_argument( + "--original", + required=False, + default=None, + help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose output", + ) + parser.add_argument( + "--auto-repair", + action="store_true", + help="Automatically repair common issues (hex IDs, whitespace preservation)", + ) + parser.add_argument( + "--author", + default="Claude", + help="Author name for redlining validation (default: Claude)", + ) + args = parser.parse_args() + + path = Path(args.path) + assert path.exists(), f"Error: {path} does not exist" + + original_file = None + if args.original: + original_file = Path(args.original) + assert original_file.is_file(), f"Error: {original_file} is not a file" + assert original_file.suffix.lower() in [".docx", ".pptx", ".xlsx"], ( + f"Error: {original_file} must be a .docx, .pptx, or .xlsx file" + ) + + file_extension = (original_file or path).suffix.lower() + assert file_extension in [".docx", ".pptx", ".xlsx"], ( + f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file." + ) + + if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]: + temp_dir = tempfile.mkdtemp() + with zipfile.ZipFile(path, "r") as zf: + zf.extractall(temp_dir) + unpacked_dir = Path(temp_dir) + else: + assert path.is_dir(), f"Error: {path} is not a directory or Office file" + unpacked_dir = path + + match file_extension: + case ".docx": + validators = [ + DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose), + ] + if original_file: + validators.append( + RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author) + ) + case ".pptx": + validators = [ + PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose), + ] + case _: + print(f"Error: Validation not supported for file type {file_extension}") + sys.exit(1) + + if args.auto_repair: + total_repairs = sum(v.repair() for v in validators) + if total_repairs: + print(f"Auto-repaired {total_repairs} issue(s)") + + success = all(v.validate() for v in validators) + + if success: + print("All validations PASSED!") + + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/__init__.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/__init__.py new file mode 100644 index 00000000..db092ece --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/__init__.py @@ -0,0 +1,15 @@ +""" +Validation modules for Word document processing. +""" + +from .base import BaseSchemaValidator +from .docx import DOCXSchemaValidator +from .pptx import PPTXSchemaValidator +from .redlining import RedliningValidator + +__all__ = [ + "BaseSchemaValidator", + "DOCXSchemaValidator", + "PPTXSchemaValidator", + "RedliningValidator", +] diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/base.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/base.py new file mode 100644 index 00000000..db4a06a2 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/base.py @@ -0,0 +1,847 @@ +""" +Base validator with common validation logic for document files. +""" + +import re +from pathlib import Path + +import defusedxml.minidom +import lxml.etree + + +class BaseSchemaValidator: + + IGNORED_VALIDATION_ERRORS = [ + "hyphenationZone", + "purl.org/dc/terms", + ] + + UNIQUE_ID_REQUIREMENTS = { + "comment": ("id", "file"), + "commentrangestart": ("id", "file"), + "commentrangeend": ("id", "file"), + "bookmarkstart": ("id", "file"), + "bookmarkend": ("id", "file"), + "sldid": ("id", "file"), + "sldmasterid": ("id", "global"), + "sldlayoutid": ("id", "global"), + "cm": ("authorid", "file"), + "sheet": ("sheetid", "file"), + "definedname": ("id", "file"), + "cxnsp": ("id", "file"), + "sp": ("id", "file"), + "pic": ("id", "file"), + "grpsp": ("id", "file"), + } + + EXCLUDED_ID_CONTAINERS = { + "sectionlst", + } + + ELEMENT_RELATIONSHIP_TYPES = {} + + SCHEMA_MAPPINGS = { + "word": "ISO-IEC29500-4_2016/wml.xsd", + "ppt": "ISO-IEC29500-4_2016/pml.xsd", + "xl": "ISO-IEC29500-4_2016/sml.xsd", + "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd", + "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd", + "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd", + "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd", + ".rels": "ecma/fouth-edition/opc-relationships.xsd", + "people.xml": "microsoft/wml-2012.xsd", + "commentsIds.xml": "microsoft/wml-cid-2016.xsd", + "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd", + "commentsExtended.xml": "microsoft/wml-2012.xsd", + "chart": "ISO-IEC29500-4_2016/dml-chart.xsd", + "theme": "ISO-IEC29500-4_2016/dml-main.xsd", + "drawing": "ISO-IEC29500-4_2016/dml-main.xsd", + } + + MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006" + XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" + + PACKAGE_RELATIONSHIPS_NAMESPACE = ( + "http://schemas.openxmlformats.org/package/2006/relationships" + ) + OFFICE_RELATIONSHIPS_NAMESPACE = ( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships" + ) + CONTENT_TYPES_NAMESPACE = ( + "http://schemas.openxmlformats.org/package/2006/content-types" + ) + + MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"} + + OOXML_NAMESPACES = { + "http://schemas.openxmlformats.org/officeDocument/2006/math", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships", + "http://schemas.openxmlformats.org/schemaLibrary/2006/main", + "http://schemas.openxmlformats.org/drawingml/2006/main", + "http://schemas.openxmlformats.org/drawingml/2006/chart", + "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing", + "http://schemas.openxmlformats.org/drawingml/2006/diagram", + "http://schemas.openxmlformats.org/drawingml/2006/picture", + "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing", + "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", + "http://schemas.openxmlformats.org/wordprocessingml/2006/main", + "http://schemas.openxmlformats.org/presentationml/2006/main", + "http://schemas.openxmlformats.org/spreadsheetml/2006/main", + "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes", + "http://www.w3.org/XML/1998/namespace", + } + + def __init__(self, unpacked_dir, original_file=None, verbose=False): + self.unpacked_dir = Path(unpacked_dir).resolve() + self.original_file = Path(original_file) if original_file else None + self.verbose = verbose + + self.schemas_dir = Path(__file__).parent.parent / "schemas" + + patterns = ["*.xml", "*.rels"] + self.xml_files = [ + f for pattern in patterns for f in self.unpacked_dir.rglob(pattern) + ] + + if not self.xml_files: + print(f"Warning: No XML files found in {self.unpacked_dir}") + + def validate(self): + raise NotImplementedError("Subclasses must implement the validate method") + + def repair(self) -> int: + return self.repair_whitespace_preservation() + + def repair_whitespace_preservation(self) -> int: + repairs = 0 + + for xml_file in self.xml_files: + try: + content = xml_file.read_text(encoding="utf-8") + dom = defusedxml.minidom.parseString(content) + modified = False + + for elem in dom.getElementsByTagName("*"): + if elem.tagName.endswith(":t") and elem.firstChild: + text = elem.firstChild.nodeValue + if text and (text.startswith((' ', '\t')) or text.endswith((' ', '\t'))): + if elem.getAttribute("xml:space") != "preserve": + elem.setAttribute("xml:space", "preserve") + text_preview = repr(text[:30]) + "..." if len(text) > 30 else repr(text) + print(f" Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}") + repairs += 1 + modified = True + + if modified: + xml_file.write_bytes(dom.toxml(encoding="UTF-8")) + + except Exception: + pass + + return repairs + + def validate_xml(self): + errors = [] + + for xml_file in self.xml_files: + try: + lxml.etree.parse(str(xml_file)) + except lxml.etree.XMLSyntaxError as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {e.lineno}: {e.msg}" + ) + except Exception as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Unexpected error: {str(e)}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} XML violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All XML files are well-formed") + return True + + def validate_namespaces(self): + errors = [] + + for xml_file in self.xml_files: + try: + root = lxml.etree.parse(str(xml_file)).getroot() + declared = set(root.nsmap.keys()) - {None} + + for attr_val in [ + v for k, v in root.attrib.items() if k.endswith("Ignorable") + ]: + undeclared = set(attr_val.split()) - declared + errors.extend( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Namespace '{ns}' in Ignorable but not declared" + for ns in undeclared + ) + except lxml.etree.XMLSyntaxError: + continue + + if errors: + print(f"FAILED - {len(errors)} namespace issues:") + for error in errors: + print(error) + return False + if self.verbose: + print("PASSED - All namespace prefixes properly declared") + return True + + def validate_unique_ids(self): + errors = [] + global_ids = {} + + for xml_file in self.xml_files: + try: + root = lxml.etree.parse(str(xml_file)).getroot() + file_ids = {} + + mc_elements = root.xpath( + ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE} + ) + for elem in mc_elements: + elem.getparent().remove(elem) + + for elem in root.iter(): + tag = ( + elem.tag.split("}")[-1].lower() + if "}" in elem.tag + else elem.tag.lower() + ) + + if tag in self.UNIQUE_ID_REQUIREMENTS: + in_excluded_container = any( + ancestor.tag.split("}")[-1].lower() in self.EXCLUDED_ID_CONTAINERS + for ancestor in elem.iterancestors() + ) + if in_excluded_container: + continue + + attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag] + + id_value = None + for attr, value in elem.attrib.items(): + attr_local = ( + attr.split("}")[-1].lower() + if "}" in attr + else attr.lower() + ) + if attr_local == attr_name: + id_value = value + break + + if id_value is not None: + if scope == "global": + if id_value in global_ids: + prev_file, prev_line, prev_tag = global_ids[ + id_value + ] + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> " + f"already used in {prev_file} at line {prev_line} in <{prev_tag}>" + ) + else: + global_ids[id_value] = ( + xml_file.relative_to(self.unpacked_dir), + elem.sourceline, + tag, + ) + elif scope == "file": + key = (tag, attr_name) + if key not in file_ids: + file_ids[key] = {} + + if id_value in file_ids[key]: + prev_line = file_ids[key][id_value] + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> " + f"(first occurrence at line {prev_line})" + ) + else: + file_ids[key][id_value] = elem.sourceline + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} ID uniqueness violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All required IDs are unique") + return True + + def validate_file_references(self): + errors = [] + + rels_files = list(self.unpacked_dir.rglob("*.rels")) + + if not rels_files: + if self.verbose: + print("PASSED - No .rels files found") + return True + + all_files = [] + for file_path in self.unpacked_dir.rglob("*"): + if ( + file_path.is_file() + and file_path.name != "[Content_Types].xml" + and not file_path.name.endswith(".rels") + ): + all_files.append(file_path.resolve()) + + all_referenced_files = set() + + if self.verbose: + print( + f"Found {len(rels_files)} .rels files and {len(all_files)} target files" + ) + + for rels_file in rels_files: + try: + rels_root = lxml.etree.parse(str(rels_file)).getroot() + + rels_dir = rels_file.parent + + referenced_files = set() + broken_refs = [] + + for rel in rels_root.findall( + ".//ns:Relationship", + namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE}, + ): + target = rel.get("Target") + if target and not target.startswith( + ("http", "mailto:") + ): + if target.startswith("/"): + target_path = self.unpacked_dir / target.lstrip("/") + elif rels_file.name == ".rels": + target_path = self.unpacked_dir / target + else: + base_dir = rels_dir.parent + target_path = base_dir / target + + try: + target_path = target_path.resolve() + if target_path.exists() and target_path.is_file(): + referenced_files.add(target_path) + all_referenced_files.add(target_path) + else: + broken_refs.append((target, rel.sourceline)) + except (OSError, ValueError): + broken_refs.append((target, rel.sourceline)) + + if broken_refs: + rel_path = rels_file.relative_to(self.unpacked_dir) + for broken_ref, line_num in broken_refs: + errors.append( + f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}" + ) + + except Exception as e: + rel_path = rels_file.relative_to(self.unpacked_dir) + errors.append(f" Error parsing {rel_path}: {e}") + + unreferenced_files = set(all_files) - all_referenced_files + + if unreferenced_files: + for unref_file in sorted(unreferenced_files): + unref_rel_path = unref_file.relative_to(self.unpacked_dir) + errors.append(f" Unreferenced file: {unref_rel_path}") + + if errors: + print(f"FAILED - Found {len(errors)} relationship validation errors:") + for error in errors: + print(error) + print( + "CRITICAL: These errors will cause the document to appear corrupt. " + + "Broken references MUST be fixed, " + + "and unreferenced files MUST be referenced or removed." + ) + return False + else: + if self.verbose: + print( + "PASSED - All references are valid and all files are properly referenced" + ) + return True + + def validate_all_relationship_ids(self): + import lxml.etree + + errors = [] + + for xml_file in self.xml_files: + if xml_file.suffix == ".rels": + continue + + rels_dir = xml_file.parent / "_rels" + rels_file = rels_dir / f"{xml_file.name}.rels" + + if not rels_file.exists(): + continue + + try: + rels_root = lxml.etree.parse(str(rels_file)).getroot() + rid_to_type = {} + + for rel in rels_root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ): + rid = rel.get("Id") + rel_type = rel.get("Type", "") + if rid: + if rid in rid_to_type: + rels_rel_path = rels_file.relative_to(self.unpacked_dir) + errors.append( + f" {rels_rel_path}: Line {rel.sourceline}: " + f"Duplicate relationship ID '{rid}' (IDs must be unique)" + ) + type_name = ( + rel_type.split("/")[-1] if "/" in rel_type else rel_type + ) + rid_to_type[rid] = type_name + + xml_root = lxml.etree.parse(str(xml_file)).getroot() + + r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE + rid_attrs_to_check = ["id", "embed", "link"] + for elem in xml_root.iter(): + for attr_name in rid_attrs_to_check: + rid_attr = elem.get(f"{{{r_ns}}}{attr_name}") + if not rid_attr: + continue + xml_rel_path = xml_file.relative_to(self.unpacked_dir) + elem_name = ( + elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag + ) + + if rid_attr not in rid_to_type: + errors.append( + f" {xml_rel_path}: Line {elem.sourceline}: " + f"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' " + f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})" + ) + elif attr_name == "id" and self.ELEMENT_RELATIONSHIP_TYPES: + expected_type = self._get_expected_relationship_type( + elem_name + ) + if expected_type: + actual_type = rid_to_type[rid_attr] + if expected_type not in actual_type.lower(): + errors.append( + f" {xml_rel_path}: Line {elem.sourceline}: " + f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' " + f"but should point to a '{expected_type}' relationship" + ) + + except Exception as e: + xml_rel_path = xml_file.relative_to(self.unpacked_dir) + errors.append(f" Error processing {xml_rel_path}: {e}") + + if errors: + print(f"FAILED - Found {len(errors)} relationship ID reference errors:") + for error in errors: + print(error) + print("\nThese ID mismatches will cause the document to appear corrupt!") + return False + else: + if self.verbose: + print("PASSED - All relationship ID references are valid") + return True + + def _get_expected_relationship_type(self, element_name): + elem_lower = element_name.lower() + + if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES: + return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower] + + if elem_lower.endswith("id") and len(elem_lower) > 2: + prefix = elem_lower[:-2] + if prefix.endswith("master"): + return prefix.lower() + elif prefix.endswith("layout"): + return prefix.lower() + else: + if prefix == "sld": + return "slide" + return prefix.lower() + + if elem_lower.endswith("reference") and len(elem_lower) > 9: + prefix = elem_lower[:-9] + return prefix.lower() + + return None + + def validate_content_types(self): + errors = [] + + content_types_file = self.unpacked_dir / "[Content_Types].xml" + if not content_types_file.exists(): + print("FAILED - [Content_Types].xml file not found") + return False + + try: + root = lxml.etree.parse(str(content_types_file)).getroot() + declared_parts = set() + declared_extensions = set() + + for override in root.findall( + f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override" + ): + part_name = override.get("PartName") + if part_name is not None: + declared_parts.add(part_name.lstrip("/")) + + for default in root.findall( + f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default" + ): + extension = default.get("Extension") + if extension is not None: + declared_extensions.add(extension.lower()) + + declarable_roots = { + "sld", + "sldLayout", + "sldMaster", + "presentation", + "document", + "workbook", + "worksheet", + "theme", + } + + media_extensions = { + "png": "image/png", + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "gif": "image/gif", + "bmp": "image/bmp", + "tiff": "image/tiff", + "wmf": "image/x-wmf", + "emf": "image/x-emf", + } + + all_files = list(self.unpacked_dir.rglob("*")) + all_files = [f for f in all_files if f.is_file()] + + for xml_file in self.xml_files: + path_str = str(xml_file.relative_to(self.unpacked_dir)).replace( + "\\", "/" + ) + + if any( + skip in path_str + for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"] + ): + continue + + try: + root_tag = lxml.etree.parse(str(xml_file)).getroot().tag + root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag + + if root_name in declarable_roots and path_str not in declared_parts: + errors.append( + f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml" + ) + + except Exception: + continue + + for file_path in all_files: + if file_path.suffix.lower() in {".xml", ".rels"}: + continue + if file_path.name == "[Content_Types].xml": + continue + if "_rels" in file_path.parts or "docProps" in file_path.parts: + continue + + extension = file_path.suffix.lstrip(".").lower() + if extension and extension not in declared_extensions: + if extension in media_extensions: + relative_path = file_path.relative_to(self.unpacked_dir) + errors.append( + f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: ' + ) + + except Exception as e: + errors.append(f" Error parsing [Content_Types].xml: {e}") + + if errors: + print(f"FAILED - Found {len(errors)} content type declaration errors:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print( + "PASSED - All content files are properly declared in [Content_Types].xml" + ) + return True + + def validate_file_against_xsd(self, xml_file, verbose=False): + xml_file = Path(xml_file).resolve() + unpacked_dir = self.unpacked_dir.resolve() + + is_valid, current_errors = self._validate_single_file_xsd( + xml_file, unpacked_dir + ) + + if is_valid is None: + return None, set() + elif is_valid: + return True, set() + + original_errors = self._get_original_file_errors(xml_file) + + assert current_errors is not None + new_errors = current_errors - original_errors + + new_errors = { + e for e in new_errors + if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS) + } + + if new_errors: + if verbose: + relative_path = xml_file.relative_to(unpacked_dir) + print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)") + for error in list(new_errors)[:3]: + truncated = error[:250] + "..." if len(error) > 250 else error + print(f" - {truncated}") + return False, new_errors + else: + if verbose: + print( + f"PASSED - No new errors (original had {len(current_errors)} errors)" + ) + return True, set() + + def validate_against_xsd(self): + new_errors = [] + original_error_count = 0 + valid_count = 0 + skipped_count = 0 + + for xml_file in self.xml_files: + relative_path = str(xml_file.relative_to(self.unpacked_dir)) + is_valid, new_file_errors = self.validate_file_against_xsd( + xml_file, verbose=False + ) + + if is_valid is None: + skipped_count += 1 + continue + elif is_valid and not new_file_errors: + valid_count += 1 + continue + elif is_valid: + original_error_count += 1 + valid_count += 1 + continue + + new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)") + for error in list(new_file_errors)[:3]: + new_errors.append( + f" - {error[:250]}..." if len(error) > 250 else f" - {error}" + ) + + if self.verbose: + print(f"Validated {len(self.xml_files)} files:") + print(f" - Valid: {valid_count}") + print(f" - Skipped (no schema): {skipped_count}") + if original_error_count: + print(f" - With original errors (ignored): {original_error_count}") + print( + f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}" + ) + + if new_errors: + print("\nFAILED - Found NEW validation errors:") + for error in new_errors: + print(error) + return False + else: + if self.verbose: + print("\nPASSED - No new XSD validation errors introduced") + return True + + def _get_schema_path(self, xml_file): + if xml_file.name in self.SCHEMA_MAPPINGS: + return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name] + + if xml_file.suffix == ".rels": + return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"] + + if "charts/" in str(xml_file) and xml_file.name.startswith("chart"): + return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"] + + if "theme/" in str(xml_file) and xml_file.name.startswith("theme"): + return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"] + + if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS: + return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name] + + return None + + def _clean_ignorable_namespaces(self, xml_doc): + xml_string = lxml.etree.tostring(xml_doc, encoding="unicode") + xml_copy = lxml.etree.fromstring(xml_string) + + for elem in xml_copy.iter(): + attrs_to_remove = [] + + for attr in elem.attrib: + if "{" in attr: + ns = attr.split("}")[0][1:] + if ns not in self.OOXML_NAMESPACES: + attrs_to_remove.append(attr) + + for attr in attrs_to_remove: + del elem.attrib[attr] + + self._remove_ignorable_elements(xml_copy) + + return lxml.etree.ElementTree(xml_copy) + + def _remove_ignorable_elements(self, root): + elements_to_remove = [] + + for elem in list(root): + if not hasattr(elem, "tag") or callable(elem.tag): + continue + + tag_str = str(elem.tag) + if tag_str.startswith("{"): + ns = tag_str.split("}")[0][1:] + if ns not in self.OOXML_NAMESPACES: + elements_to_remove.append(elem) + continue + + self._remove_ignorable_elements(elem) + + for elem in elements_to_remove: + root.remove(elem) + + def _preprocess_for_mc_ignorable(self, xml_doc): + root = xml_doc.getroot() + + if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib: + del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"] + + return xml_doc + + def _validate_single_file_xsd(self, xml_file, base_path): + schema_path = self._get_schema_path(xml_file) + if not schema_path: + return None, None + + try: + with open(schema_path, "rb") as xsd_file: + parser = lxml.etree.XMLParser() + xsd_doc = lxml.etree.parse( + xsd_file, parser=parser, base_url=str(schema_path) + ) + schema = lxml.etree.XMLSchema(xsd_doc) + + with open(xml_file, "r") as f: + xml_doc = lxml.etree.parse(f) + + xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc) + xml_doc = self._preprocess_for_mc_ignorable(xml_doc) + + relative_path = xml_file.relative_to(base_path) + if ( + relative_path.parts + and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS + ): + xml_doc = self._clean_ignorable_namespaces(xml_doc) + + if schema.validate(xml_doc): + return True, set() + else: + errors = set() + for error in schema.error_log: + errors.add(error.message) + return False, errors + + except Exception as e: + return False, {str(e)} + + def _get_original_file_errors(self, xml_file): + if self.original_file is None: + return set() + + import tempfile + import zipfile + + xml_file = Path(xml_file).resolve() + unpacked_dir = self.unpacked_dir.resolve() + relative_path = xml_file.relative_to(unpacked_dir) + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + with zipfile.ZipFile(self.original_file, "r") as zip_ref: + zip_ref.extractall(temp_path) + + original_xml_file = temp_path / relative_path + + if not original_xml_file.exists(): + return set() + + is_valid, errors = self._validate_single_file_xsd( + original_xml_file, temp_path + ) + return errors if errors else set() + + def _remove_template_tags_from_text_nodes(self, xml_doc): + warnings = [] + template_pattern = re.compile(r"\{\{[^}]*\}\}") + + xml_string = lxml.etree.tostring(xml_doc, encoding="unicode") + xml_copy = lxml.etree.fromstring(xml_string) + + def process_text_content(text, content_type): + if not text: + return text + matches = list(template_pattern.finditer(text)) + if matches: + for match in matches: + warnings.append( + f"Found template tag in {content_type}: {match.group()}" + ) + return template_pattern.sub("", text) + return text + + for elem in xml_copy.iter(): + if not hasattr(elem, "tag") or callable(elem.tag): + continue + tag_str = str(elem.tag) + if tag_str.endswith("}t") or tag_str == "t": + continue + + elem.text = process_text_content(elem.text, "text content") + elem.tail = process_text_content(elem.tail, "tail content") + + return lxml.etree.ElementTree(xml_copy), warnings + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/docx.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/docx.py new file mode 100644 index 00000000..fec405e6 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/docx.py @@ -0,0 +1,446 @@ +""" +Validator for Word document XML files against XSD schemas. +""" + +import random +import re +import tempfile +import zipfile + +import defusedxml.minidom +import lxml.etree + +from .base import BaseSchemaValidator + + +class DOCXSchemaValidator(BaseSchemaValidator): + + WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml" + W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid" + + ELEMENT_RELATIONSHIP_TYPES = {} + + def validate(self): + if not self.validate_xml(): + return False + + all_valid = True + if not self.validate_namespaces(): + all_valid = False + + if not self.validate_unique_ids(): + all_valid = False + + if not self.validate_file_references(): + all_valid = False + + if not self.validate_content_types(): + all_valid = False + + if not self.validate_against_xsd(): + all_valid = False + + if not self.validate_whitespace_preservation(): + all_valid = False + + if not self.validate_deletions(): + all_valid = False + + if not self.validate_insertions(): + all_valid = False + + if not self.validate_all_relationship_ids(): + all_valid = False + + if not self.validate_id_constraints(): + all_valid = False + + if not self.validate_comment_markers(): + all_valid = False + + self.compare_paragraph_counts() + + return all_valid + + def validate_whitespace_preservation(self): + errors = [] + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + + for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"): + if elem.text: + text = elem.text + if re.search(r"^[ \t\n\r]", text) or re.search( + r"[ \t\n\r]$", text + ): + xml_space_attr = f"{{{self.XML_NAMESPACE}}}space" + if ( + xml_space_attr not in elem.attrib + or elem.attrib[xml_space_attr] != "preserve" + ): + text_preview = ( + repr(text)[:50] + "..." + if len(repr(text)) > 50 + else repr(text) + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: w:t element with whitespace missing xml:space='preserve': {text_preview}" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} whitespace preservation violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All whitespace is properly preserved") + return True + + def validate_deletions(self): + errors = [] + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + namespaces = {"w": self.WORD_2006_NAMESPACE} + + for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces): + if t_elem.text: + text_preview = ( + repr(t_elem.text)[:50] + "..." + if len(repr(t_elem.text)) > 50 + else repr(t_elem.text) + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {t_elem.sourceline}: found within : {text_preview}" + ) + + for instr_elem in root.xpath( + ".//w:del//w:instrText", namespaces=namespaces + ): + text_preview = ( + repr(instr_elem.text or "")[:50] + "..." + if len(repr(instr_elem.text or "")) > 50 + else repr(instr_elem.text or "") + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {instr_elem.sourceline}: found within (use ): {text_preview}" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} deletion validation violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - No w:t elements found within w:del elements") + return True + + def count_paragraphs_in_unpacked(self): + count = 0 + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p") + count = len(paragraphs) + except Exception as e: + print(f"Error counting paragraphs in unpacked document: {e}") + + return count + + def count_paragraphs_in_original(self): + original = self.original_file + if original is None: + return 0 + + count = 0 + + try: + with tempfile.TemporaryDirectory() as temp_dir: + with zipfile.ZipFile(original, "r") as zip_ref: + zip_ref.extractall(temp_dir) + + doc_xml_path = temp_dir + "/word/document.xml" + root = lxml.etree.parse(doc_xml_path).getroot() + + paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p") + count = len(paragraphs) + + except Exception as e: + print(f"Error counting paragraphs in original document: {e}") + + return count + + def validate_insertions(self): + errors = [] + + for xml_file in self.xml_files: + if xml_file.name != "document.xml": + continue + + try: + root = lxml.etree.parse(str(xml_file)).getroot() + namespaces = {"w": self.WORD_2006_NAMESPACE} + + invalid_elements = root.xpath( + ".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces + ) + + for elem in invalid_elements: + text_preview = ( + repr(elem.text or "")[:50] + "..." + if len(repr(elem.text or "")) > 50 + else repr(elem.text or "") + ) + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: within : {text_preview}" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} insertion validation violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - No w:delText elements within w:ins elements") + return True + + def compare_paragraph_counts(self): + original_count = self.count_paragraphs_in_original() + new_count = self.count_paragraphs_in_unpacked() + + diff = new_count - original_count + diff_str = f"+{diff}" if diff > 0 else str(diff) + print(f"\nParagraphs: {original_count} → {new_count} ({diff_str})") + + def _parse_id_value(self, val: str, base: int = 16) -> int: + return int(val, base) + + def validate_id_constraints(self): + errors = [] + para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId" + durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId" + + for xml_file in self.xml_files: + try: + for elem in lxml.etree.parse(str(xml_file)).iter(): + if val := elem.get(para_id_attr): + if self._parse_id_value(val, base=16) >= 0x80000000: + errors.append( + f" {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000" + ) + + if val := elem.get(durable_id_attr): + if xml_file.name == "numbering.xml": + try: + if self._parse_id_value(val, base=10) >= 0x7FFFFFFF: + errors.append( + f" {xml_file.name}:{elem.sourceline}: " + f"durableId={val} >= 0x7FFFFFFF" + ) + except ValueError: + errors.append( + f" {xml_file.name}:{elem.sourceline}: " + f"durableId={val} must be decimal in numbering.xml" + ) + else: + if self._parse_id_value(val, base=16) >= 0x7FFFFFFF: + errors.append( + f" {xml_file.name}:{elem.sourceline}: " + f"durableId={val} >= 0x7FFFFFFF" + ) + except Exception: + pass + + if errors: + print(f"FAILED - {len(errors)} ID constraint violations:") + for e in errors: + print(e) + elif self.verbose: + print("PASSED - All paraId/durableId values within constraints") + return not errors + + def validate_comment_markers(self): + errors = [] + + document_xml = None + comments_xml = None + for xml_file in self.xml_files: + if xml_file.name == "document.xml" and "word" in str(xml_file): + document_xml = xml_file + elif xml_file.name == "comments.xml": + comments_xml = xml_file + + if not document_xml: + if self.verbose: + print("PASSED - No document.xml found (skipping comment validation)") + return True + + try: + doc_root = lxml.etree.parse(str(document_xml)).getroot() + namespaces = {"w": self.WORD_2006_NAMESPACE} + + range_starts = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in doc_root.xpath( + ".//w:commentRangeStart", namespaces=namespaces + ) + } + range_ends = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in doc_root.xpath( + ".//w:commentRangeEnd", namespaces=namespaces + ) + } + references = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in doc_root.xpath( + ".//w:commentReference", namespaces=namespaces + ) + } + + orphaned_ends = range_ends - range_starts + for comment_id in sorted( + orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0 + ): + errors.append( + f' document.xml: commentRangeEnd id="{comment_id}" has no matching commentRangeStart' + ) + + orphaned_starts = range_starts - range_ends + for comment_id in sorted( + orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0 + ): + errors.append( + f' document.xml: commentRangeStart id="{comment_id}" has no matching commentRangeEnd' + ) + + comment_ids = set() + if comments_xml and comments_xml.exists(): + comments_root = lxml.etree.parse(str(comments_xml)).getroot() + comment_ids = { + elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id") + for elem in comments_root.xpath( + ".//w:comment", namespaces=namespaces + ) + } + + marker_ids = range_starts | range_ends | references + invalid_refs = marker_ids - comment_ids + for comment_id in sorted( + invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0 + ): + if comment_id: + errors.append( + f' document.xml: marker id="{comment_id}" references non-existent comment' + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append(f" Error parsing XML: {e}") + + if errors: + print(f"FAILED - {len(errors)} comment marker violations:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All comment markers properly paired") + return True + + def repair(self) -> int: + repairs = super().repair() + repairs += self.repair_durableId() + return repairs + + def repair_durableId(self) -> int: + repairs = 0 + + for xml_file in self.xml_files: + try: + content = xml_file.read_text(encoding="utf-8") + dom = defusedxml.minidom.parseString(content) + modified = False + + for elem in dom.getElementsByTagName("*"): + if not elem.hasAttribute("w16cid:durableId"): + continue + + durable_id = elem.getAttribute("w16cid:durableId") + needs_repair = False + + if xml_file.name == "numbering.xml": + try: + needs_repair = ( + self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF + ) + except ValueError: + needs_repair = True + else: + try: + needs_repair = ( + self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF + ) + except ValueError: + needs_repair = True + + if needs_repair: + value = random.randint(1, 0x7FFFFFFE) + if xml_file.name == "numbering.xml": + new_id = str(value) + else: + new_id = f"{value:08X}" + + elem.setAttribute("w16cid:durableId", new_id) + print( + f" Repaired: {xml_file.name}: durableId {durable_id} → {new_id}" + ) + repairs += 1 + modified = True + + if modified: + xml_file.write_bytes(dom.toxml(encoding="UTF-8")) + + except Exception: + pass + + return repairs + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/pptx.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/pptx.py new file mode 100644 index 00000000..09842aa9 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/pptx.py @@ -0,0 +1,275 @@ +""" +Validator for PowerPoint presentation XML files against XSD schemas. +""" + +import re + +from .base import BaseSchemaValidator + + +class PPTXSchemaValidator(BaseSchemaValidator): + + PRESENTATIONML_NAMESPACE = ( + "http://schemas.openxmlformats.org/presentationml/2006/main" + ) + + ELEMENT_RELATIONSHIP_TYPES = { + "sldid": "slide", + "sldmasterid": "slidemaster", + "notesmasterid": "notesmaster", + "sldlayoutid": "slidelayout", + "themeid": "theme", + "tablestyleid": "tablestyles", + } + + def validate(self): + if not self.validate_xml(): + return False + + all_valid = True + if not self.validate_namespaces(): + all_valid = False + + if not self.validate_unique_ids(): + all_valid = False + + if not self.validate_uuid_ids(): + all_valid = False + + if not self.validate_file_references(): + all_valid = False + + if not self.validate_slide_layout_ids(): + all_valid = False + + if not self.validate_content_types(): + all_valid = False + + if not self.validate_against_xsd(): + all_valid = False + + if not self.validate_notes_slide_references(): + all_valid = False + + if not self.validate_all_relationship_ids(): + all_valid = False + + if not self.validate_no_duplicate_slide_layouts(): + all_valid = False + + return all_valid + + def validate_uuid_ids(self): + import lxml.etree + + errors = [] + uuid_pattern = re.compile( + r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$" + ) + + for xml_file in self.xml_files: + try: + root = lxml.etree.parse(str(xml_file)).getroot() + + for elem in root.iter(): + for attr, value in elem.attrib.items(): + attr_name = attr.split("}")[-1].lower() + if attr_name == "id" or attr_name.endswith("id"): + if self._looks_like_uuid(value): + if not uuid_pattern.match(value): + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: " + f"Line {elem.sourceline}: ID '{value}' appears to be a UUID but contains invalid hex characters" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} UUID ID validation errors:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All UUID-like IDs contain valid hex values") + return True + + def _looks_like_uuid(self, value): + clean_value = value.strip("{}()").replace("-", "") + return len(clean_value) == 32 and all(c.isalnum() for c in clean_value) + + def validate_slide_layout_ids(self): + import lxml.etree + + errors = [] + + slide_masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml")) + + if not slide_masters: + if self.verbose: + print("PASSED - No slide masters found") + return True + + for slide_master in slide_masters: + try: + root = lxml.etree.parse(str(slide_master)).getroot() + + rels_file = slide_master.parent / "_rels" / f"{slide_master.name}.rels" + + if not rels_file.exists(): + errors.append( + f" {slide_master.relative_to(self.unpacked_dir)}: " + f"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}" + ) + continue + + rels_root = lxml.etree.parse(str(rels_file)).getroot() + + valid_layout_rids = set() + for rel in rels_root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ): + rel_type = rel.get("Type", "") + if "slideLayout" in rel_type: + valid_layout_rids.add(rel.get("Id")) + + for sld_layout_id in root.findall( + f".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId" + ): + r_id = sld_layout_id.get( + f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id" + ) + layout_id = sld_layout_id.get("id") + + if r_id and r_id not in valid_layout_rids: + errors.append( + f" {slide_master.relative_to(self.unpacked_dir)}: " + f"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' " + f"references r:id='{r_id}' which is not found in slide layout relationships" + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {slide_master.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print(f"FAILED - Found {len(errors)} slide layout ID validation errors:") + for error in errors: + print(error) + print( + "Remove invalid references or add missing slide layouts to the relationships file." + ) + return False + else: + if self.verbose: + print("PASSED - All slide layout IDs reference valid slide layouts") + return True + + def validate_no_duplicate_slide_layouts(self): + import lxml.etree + + errors = [] + slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels")) + + for rels_file in slide_rels_files: + try: + root = lxml.etree.parse(str(rels_file)).getroot() + + layout_rels = [ + rel + for rel in root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ) + if "slideLayout" in rel.get("Type", "") + ] + + if len(layout_rels) > 1: + errors.append( + f" {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references" + ) + + except Exception as e: + errors.append( + f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + if errors: + print("FAILED - Found slides with duplicate slideLayout references:") + for error in errors: + print(error) + return False + else: + if self.verbose: + print("PASSED - All slides have exactly one slideLayout reference") + return True + + def validate_notes_slide_references(self): + import lxml.etree + + errors = [] + notes_slide_references = {} + + slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels")) + + if not slide_rels_files: + if self.verbose: + print("PASSED - No slide relationship files found") + return True + + for rels_file in slide_rels_files: + try: + root = lxml.etree.parse(str(rels_file)).getroot() + + for rel in root.findall( + f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship" + ): + rel_type = rel.get("Type", "") + if "notesSlide" in rel_type: + target = rel.get("Target", "") + if target: + normalized_target = target.replace("../", "") + + slide_name = rels_file.stem.replace( + ".xml", "" + ) + + if normalized_target not in notes_slide_references: + notes_slide_references[normalized_target] = [] + notes_slide_references[normalized_target].append( + (slide_name, rels_file) + ) + + except (lxml.etree.XMLSyntaxError, Exception) as e: + errors.append( + f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}" + ) + + for target, references in notes_slide_references.items(): + if len(references) > 1: + slide_names = [ref[0] for ref in references] + errors.append( + f" Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}" + ) + for slide_name, rels_file in references: + errors.append(f" - {rels_file.relative_to(self.unpacked_dir)}") + + if errors: + print( + f"FAILED - Found {len([e for e in errors if not e.startswith(' ')])} notes slide reference validation errors:" + ) + for error in errors: + print(error) + print("Each slide may optionally have its own slide file.") + return False + else: + if self.verbose: + print("PASSED - All notes slide references are unique") + return True + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/redlining.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/redlining.py new file mode 100644 index 00000000..71c81b6b --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/office/validators/redlining.py @@ -0,0 +1,247 @@ +""" +Validator for tracked changes in Word documents. +""" + +import subprocess +import tempfile +import zipfile +from pathlib import Path + + +class RedliningValidator: + + def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"): + self.unpacked_dir = Path(unpacked_dir) + self.original_docx = Path(original_docx) + self.verbose = verbose + self.author = author + self.namespaces = { + "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + } + + def repair(self) -> int: + return 0 + + def validate(self): + modified_file = self.unpacked_dir / "word" / "document.xml" + if not modified_file.exists(): + print(f"FAILED - Modified document.xml not found at {modified_file}") + return False + + try: + import xml.etree.ElementTree as ET + + tree = ET.parse(modified_file) + root = tree.getroot() + + del_elements = root.findall(".//w:del", self.namespaces) + ins_elements = root.findall(".//w:ins", self.namespaces) + + author_del_elements = [ + elem + for elem in del_elements + if elem.get(f"{{{self.namespaces['w']}}}author") == self.author + ] + author_ins_elements = [ + elem + for elem in ins_elements + if elem.get(f"{{{self.namespaces['w']}}}author") == self.author + ] + + if not author_del_elements and not author_ins_elements: + if self.verbose: + print(f"PASSED - No tracked changes by {self.author} found.") + return True + + except Exception: + pass + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + try: + with zipfile.ZipFile(self.original_docx, "r") as zip_ref: + zip_ref.extractall(temp_path) + except Exception as e: + print(f"FAILED - Error unpacking original docx: {e}") + return False + + original_file = temp_path / "word" / "document.xml" + if not original_file.exists(): + print( + f"FAILED - Original document.xml not found in {self.original_docx}" + ) + return False + + try: + import xml.etree.ElementTree as ET + + modified_tree = ET.parse(modified_file) + modified_root = modified_tree.getroot() + original_tree = ET.parse(original_file) + original_root = original_tree.getroot() + except ET.ParseError as e: + print(f"FAILED - Error parsing XML files: {e}") + return False + + self._remove_author_tracked_changes(original_root) + self._remove_author_tracked_changes(modified_root) + + modified_text = self._extract_text_content(modified_root) + original_text = self._extract_text_content(original_root) + + if modified_text != original_text: + error_message = self._generate_detailed_diff( + original_text, modified_text + ) + print(error_message) + return False + + if self.verbose: + print(f"PASSED - All changes by {self.author} are properly tracked") + return True + + def _generate_detailed_diff(self, original_text, modified_text): + error_parts = [ + f"FAILED - Document text doesn't match after removing {self.author}'s tracked changes", + "", + "Likely causes:", + " 1. Modified text inside another author's or tags", + " 2. Made edits without proper tracked changes", + " 3. Didn't nest inside when deleting another's insertion", + "", + "For pre-redlined documents, use correct patterns:", + " - To reject another's INSERTION: Nest inside their ", + " - To restore another's DELETION: Add new AFTER their ", + "", + ] + + git_diff = self._get_git_word_diff(original_text, modified_text) + if git_diff: + error_parts.extend(["Differences:", "============", git_diff]) + else: + error_parts.append("Unable to generate word diff (git not available)") + + return "\n".join(error_parts) + + def _get_git_word_diff(self, original_text, modified_text): + try: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + original_file = temp_path / "original.txt" + modified_file = temp_path / "modified.txt" + + original_file.write_text(original_text, encoding="utf-8") + modified_file.write_text(modified_text, encoding="utf-8") + + result = subprocess.run( + [ + "git", + "diff", + "--word-diff=plain", + "--word-diff-regex=.", + "-U0", + "--no-index", + str(original_file), + str(modified_file), + ], + capture_output=True, + text=True, + ) + + if result.stdout.strip(): + lines = result.stdout.split("\n") + content_lines = [] + in_content = False + for line in lines: + if line.startswith("@@"): + in_content = True + continue + if in_content and line.strip(): + content_lines.append(line) + + if content_lines: + return "\n".join(content_lines) + + result = subprocess.run( + [ + "git", + "diff", + "--word-diff=plain", + "-U0", + "--no-index", + str(original_file), + str(modified_file), + ], + capture_output=True, + text=True, + ) + + if result.stdout.strip(): + lines = result.stdout.split("\n") + content_lines = [] + in_content = False + for line in lines: + if line.startswith("@@"): + in_content = True + continue + if in_content and line.strip(): + content_lines.append(line) + return "\n".join(content_lines) + + except (subprocess.CalledProcessError, FileNotFoundError, Exception): + pass + + return None + + def _remove_author_tracked_changes(self, root): + ins_tag = f"{{{self.namespaces['w']}}}ins" + del_tag = f"{{{self.namespaces['w']}}}del" + author_attr = f"{{{self.namespaces['w']}}}author" + + for parent in root.iter(): + to_remove = [] + for child in parent: + if child.tag == ins_tag and child.get(author_attr) == self.author: + to_remove.append(child) + for elem in to_remove: + parent.remove(elem) + + deltext_tag = f"{{{self.namespaces['w']}}}delText" + t_tag = f"{{{self.namespaces['w']}}}t" + + for parent in root.iter(): + to_process = [] + for child in parent: + if child.tag == del_tag and child.get(author_attr) == self.author: + to_process.append((child, list(parent).index(child))) + + for del_elem, del_index in reversed(to_process): + for elem in del_elem.iter(): + if elem.tag == deltext_tag: + elem.tag = t_tag + + for child in reversed(list(del_elem)): + parent.insert(del_index, child) + parent.remove(del_elem) + + def _extract_text_content(self, root): + p_tag = f"{{{self.namespaces['w']}}}p" + t_tag = f"{{{self.namespaces['w']}}}t" + + paragraphs = [] + for p_elem in root.findall(f".//{p_tag}"): + text_parts = [] + for t_elem in p_elem.findall(f".//{t_tag}"): + if t_elem.text: + text_parts.append(t_elem.text) + paragraph_text = "".join(text_parts) + if paragraph_text: + paragraphs.append(paragraph_text) + + return "\n".join(paragraphs) + + +if __name__ == "__main__": + raise RuntimeError("This module should not be run directly.") diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/ppt_to_pic.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/ppt_to_pic.py new file mode 100644 index 00000000..2afefb4d --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/ppt_to_pic.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +""" +PPT to Image Converter for PPTX Plus Skill (Linux) +Converts PPTX slides to images for visual inspection. + +Usage: + python ppt_to_pic.py --ppt-dir ./ppt --output-dir ./images + python ppt_to_pic.py --file presentation.pptx --output slide.png + +Requirements: + - LibreOffice with soffice available + - Poppler (pdftoppm) for PDF to image conversion +""" + +import argparse +import os +import sys +import time +from pathlib import Path + + +def ppt_to_pic_libreoffice(ppt_path: str, output_dir: str) -> bool: + """Convert PPTX to images using LibreOffice (cross-platform)""" + try: + import subprocess + + abs_ppt_path = os.path.abspath(ppt_path) + abs_output_dir = os.path.abspath(output_dir) + os.makedirs(abs_output_dir, exist_ok=True) + + # First convert to PDF + result = subprocess.run([ + "soffice", "--headless", "--convert-to", "pdf", + "--outdir", abs_output_dir, abs_ppt_path + ], capture_output=True, text=True, timeout=60) + + if result.returncode != 0: + print(f"LibreOffice PDF conversion failed: {result.stderr}") + return False + + # Get PDF path + pdf_name = Path(ppt_path).stem + ".pdf" + pdf_path = os.path.join(abs_output_dir, pdf_name) + + if not os.path.exists(pdf_path): + print(f"PDF not found: {pdf_path}") + return False + + # Convert PDF to images using pdftoppm + result = subprocess.run([ + "pdftoppm", "-jpeg", "-r", "150", pdf_path, + os.path.join(abs_output_dir, "slide") + ], capture_output=True, text=True, timeout=60) + + if result.returncode != 0: + print(f"pdftoppm failed: {result.stderr}") + return False + + # Clean up PDF + try: + os.remove(pdf_path) + except: + pass + + return True + + except FileNotFoundError as e: + print(f"Required tool not found: {e}") + print("Please install LibreOffice and Poppler (pdftoppm)") + return False + except Exception as e: + print(f"LibreOffice conversion failed: {e}") + return False + + +def convert_pptx_to_images(ppt_path: str, output_dir: str) -> str: + """Convert a single PPTX file to images""" + abs_ppt_path = os.path.abspath(ppt_path) + abs_output_dir = os.path.abspath(output_dir) + + if not os.path.exists(abs_ppt_path): + return f"Error: PPTX file not found: {abs_ppt_path}" + + os.makedirs(abs_output_dir, exist_ok=True) + + base_name = Path(ppt_path).stem + + # Use LibreOffice for conversion (Linux) + if ppt_to_pic_libreoffice(abs_ppt_path, abs_output_dir): + return f"Successfully converted {ppt_path} to images in {abs_output_dir}" + + return f"Failed to convert {ppt_path}. Please ensure LibreOffice and Poppler (pdftoppm) are installed." + + +def convert_directory(ppt_dir: str, output_dir: str) -> str: + """Convert all PPTX files in a directory to images""" + abs_ppt_dir = os.path.abspath(ppt_dir.lstrip("/").lstrip("\\")) + abs_output_dir = os.path.abspath(output_dir.lstrip("/").lstrip("\\")) + + if not os.path.exists(abs_ppt_dir): + return f"Error: Directory not found: {abs_ppt_dir}" + + os.makedirs(abs_output_dir, exist_ok=True) + + ppt_files = [f for f in os.listdir(abs_ppt_dir) + if f.endswith('.pptx') and not f.startswith('~$')] + + if not ppt_files: + return f"No PPTX files found in {abs_ppt_dir}" + + print(f"Converting {len(ppt_files)} PPTX files...") + + success_count = 0 + for filename in ppt_files: + ppt_path = os.path.join(abs_ppt_dir, filename) + print(f" Processing: {filename}...", end="", flush=True) + + base_name = Path(filename).stem + file_output_dir = os.path.join(abs_output_dir, base_name) + + if ppt_to_pic_libreoffice(ppt_path, file_output_dir): + print(" DONE") + success_count += 1 + else: + print(" FAILED") + + return f"Successfully converted {success_count}/{len(ppt_files)} files to {abs_output_dir}" + + +def main(): + parser = argparse.ArgumentParser(description="Convert PPTX slides to images") + parser.add_argument("--ppt-dir", "-d", help="Directory containing PPTX files") + parser.add_argument("--file", "-f", help="Single PPTX file to convert") + parser.add_argument("--output", "-o", help="Output directory or file path") + + args = parser.parse_args() + + if args.file: + output = args.output or os.path.splitext(args.file)[0] + result = convert_pptx_to_images(args.file, output) + print(result) + + elif args.ppt_dir: + output = args.output or os.path.join(args.ppt_dir, "images") + result = convert_directory(args.ppt_dir, output) + print(result) + + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/thumbnail.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/thumbnail.py new file mode 100644 index 00000000..edcbdc0f --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/thumbnail.py @@ -0,0 +1,289 @@ +"""Create thumbnail grids from PowerPoint presentation slides. + +Creates a grid layout of slide thumbnails for quick visual analysis. +Labels each thumbnail with its XML filename (e.g., slide1.xml). +Hidden slides are shown with a placeholder pattern. + +Usage: + python thumbnail.py input.pptx [output_prefix] [--cols N] + +Examples: + python thumbnail.py presentation.pptx + # Creates: thumbnails.jpg + + python thumbnail.py template.pptx grid --cols 4 + # Creates: grid.jpg (or grid-1.jpg, grid-2.jpg for large decks) +""" + +import argparse +import subprocess +import sys +import tempfile +import zipfile +from pathlib import Path + +import defusedxml.minidom +from office.soffice import get_soffice_env +from PIL import Image, ImageDraw, ImageFont + +THUMBNAIL_WIDTH = 300 +CONVERSION_DPI = 100 +MAX_COLS = 6 +DEFAULT_COLS = 3 +JPEG_QUALITY = 95 +GRID_PADDING = 20 +BORDER_WIDTH = 2 +FONT_SIZE_RATIO = 0.10 +LABEL_PADDING_RATIO = 0.4 + + +def main(): + parser = argparse.ArgumentParser( + description="Create thumbnail grids from PowerPoint slides." + ) + parser.add_argument("input", help="Input PowerPoint file (.pptx)") + parser.add_argument( + "output_prefix", + nargs="?", + default="thumbnails", + help="Output prefix for image files (default: thumbnails)", + ) + parser.add_argument( + "--cols", + type=int, + default=DEFAULT_COLS, + help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})", + ) + + args = parser.parse_args() + + cols = min(args.cols, MAX_COLS) + if args.cols > MAX_COLS: + print(f"Warning: Columns limited to {MAX_COLS}") + + input_path = Path(args.input) + if not input_path.exists() or input_path.suffix.lower() != ".pptx": + print(f"Error: Invalid PowerPoint file: {args.input}", file=sys.stderr) + sys.exit(1) + + output_path = Path(f"{args.output_prefix}.jpg") + + try: + slide_info = get_slide_info(input_path) + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + visible_images = convert_to_images(input_path, temp_path) + + if not visible_images and not any(s["hidden"] for s in slide_info): + print("Error: No slides found", file=sys.stderr) + sys.exit(1) + + slides = build_slide_list(slide_info, visible_images, temp_path) + + grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path) + + print(f"Created {len(grid_files)} grid(s):") + for grid_file in grid_files: + print(f" {grid_file}") + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +def get_slide_info(pptx_path: Path) -> list[dict]: + with zipfile.ZipFile(pptx_path, "r") as zf: + rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8") + rels_dom = defusedxml.minidom.parseString(rels_content) + + rid_to_slide = {} + for rel in rels_dom.getElementsByTagName("Relationship"): + rid = rel.getAttribute("Id") + target = rel.getAttribute("Target") + rel_type = rel.getAttribute("Type") + if "slide" in rel_type and target.startswith("slides/"): + rid_to_slide[rid] = target.replace("slides/", "") + + pres_content = zf.read("ppt/presentation.xml").decode("utf-8") + pres_dom = defusedxml.minidom.parseString(pres_content) + + slides = [] + for sld_id in pres_dom.getElementsByTagName("p:sldId"): + rid = sld_id.getAttribute("r:id") + if rid in rid_to_slide: + hidden = sld_id.getAttribute("show") == "0" + slides.append({"name": rid_to_slide[rid], "hidden": hidden}) + + return slides + + +def build_slide_list( + slide_info: list[dict], + visible_images: list[Path], + temp_dir: Path, +) -> list[tuple[Path, str]]: + if visible_images: + with Image.open(visible_images[0]) as img: + placeholder_size = img.size + else: + placeholder_size = (1920, 1080) + + slides = [] + visible_idx = 0 + + for info in slide_info: + if info["hidden"]: + placeholder_path = temp_dir / f"hidden-{info['name']}.jpg" + placeholder_img = create_hidden_placeholder(placeholder_size) + placeholder_img.save(placeholder_path, "JPEG") + slides.append((placeholder_path, f"{info['name']} (hidden)")) + else: + if visible_idx < len(visible_images): + slides.append((visible_images[visible_idx], info["name"])) + visible_idx += 1 + + return slides + + +def create_hidden_placeholder(size: tuple[int, int]) -> Image.Image: + img = Image.new("RGB", size, color="#F0F0F0") + draw = ImageDraw.Draw(img) + line_width = max(5, min(size) // 100) + draw.line([(0, 0), size], fill="#CCCCCC", width=line_width) + draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width) + return img + + +def convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]: + pdf_path = temp_dir / f"{pptx_path.stem}.pdf" + + result = subprocess.run( + [ + "soffice", + "--headless", + "--convert-to", + "pdf", + "--outdir", + str(temp_dir), + str(pptx_path), + ], + capture_output=True, + text=True, + env=get_soffice_env(), + ) + if result.returncode != 0 or not pdf_path.exists(): + raise RuntimeError("PDF conversion failed") + + result = subprocess.run( + [ + "pdftoppm", + "-jpeg", + "-r", + str(CONVERSION_DPI), + str(pdf_path), + str(temp_dir / "slide"), + ], + capture_output=True, + text=True, + ) + if result.returncode != 0: + raise RuntimeError("Image conversion failed") + + return sorted(temp_dir.glob("slide-*.jpg")) + + +def create_grids( + slides: list[tuple[Path, str]], + cols: int, + width: int, + output_path: Path, +) -> list[str]: + max_per_grid = cols * (cols + 1) + grid_files = [] + + for chunk_idx, start_idx in enumerate(range(0, len(slides), max_per_grid)): + end_idx = min(start_idx + max_per_grid, len(slides)) + chunk_slides = slides[start_idx:end_idx] + + grid = create_grid(chunk_slides, cols, width) + + if len(slides) <= max_per_grid: + grid_filename = output_path + else: + stem = output_path.stem + suffix = output_path.suffix + grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}" + + grid_filename.parent.mkdir(parents=True, exist_ok=True) + grid.save(str(grid_filename), quality=JPEG_QUALITY) + grid_files.append(str(grid_filename)) + + return grid_files + + +def create_grid( + slides: list[tuple[Path, str]], + cols: int, + width: int, +) -> Image.Image: + font_size = int(width * FONT_SIZE_RATIO) + label_padding = int(font_size * LABEL_PADDING_RATIO) + + with Image.open(slides[0][0]) as img: + aspect = img.height / img.width + height = int(width * aspect) + + rows = (len(slides) + cols - 1) // cols + grid_w = cols * width + (cols + 1) * GRID_PADDING + grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING + + grid = Image.new("RGB", (grid_w, grid_h), "white") + draw = ImageDraw.Draw(grid) + + try: + font = ImageFont.load_default(size=font_size) + except Exception: + font = ImageFont.load_default() + + for i, (img_path, slide_name) in enumerate(slides): + row, col = i // cols, i % cols + x = col * width + (col + 1) * GRID_PADDING + y_base = ( + row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING + ) + + label = slide_name + bbox = draw.textbbox((0, 0), label, font=font) + text_w = bbox[2] - bbox[0] + draw.text( + (x + (width - text_w) // 2, y_base + label_padding), + label, + fill="black", + font=font, + ) + + y_thumbnail = y_base + label_padding + font_size + label_padding + + with Image.open(img_path) as img: + img.thumbnail((width, height), Image.Resampling.LANCZOS) + w, h = img.size + tx = x + (width - w) // 2 + ty = y_thumbnail + (height - h) // 2 + grid.paste(img, (tx, ty)) + + if BORDER_WIDTH > 0: + draw.rectangle( + [ + (tx - BORDER_WIDTH, ty - BORDER_WIDTH), + (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1), + ], + outline="gray", + width=BORDER_WIDTH, + ) + + return grid + + +if __name__ == "__main__": + main() diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/vision_qwen.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/vision_qwen.py new file mode 100644 index 00000000..72eef411 --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/vision_qwen.py @@ -0,0 +1,211 @@ +# -*- coding: utf-8 -*- +""" +Qwen Vision Tool for PPTX Pro Skill +Provides image description capabilities using Qwen Vision model. + +Usage: + python vision_qwen.py --image path/to/image.png --prompt "Describe this slide" + python vision_qwen.py --images img1.png img2.png img3.png +""" + +import argparse +import requests +import base64 +import os +import sys +import json +import re +import time +from typing import Optional, List + +# Try to import from utils/config.py in parent project, fallback to defaults +try: + # Add parent paths to find config + config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'metierial', 'utils', 'config.py') + if os.path.exists(config_path): + sys.path.insert(0, os.path.dirname(config_path)) + from config import QWEN_KEY, LLM_API_URL + else: + raise ImportError("Config not found") +except ImportError: + # Default API configuration + QWEN_KEY = "sk-1b897ac944044d6aae35ca862a23fbdb" + LLM_API_URL = "https://maas-api.ai-yuanjing.com/openapi/compatible-mode-nosensitive/v1/chat/completions" + + +class QwenVisionTool: + """Qwen Vision API wrapper for image description""" + + def __init__(self): + self.api_key = QWEN_KEY + self.url = LLM_API_URL + + def _prepare_image_data(self, image_path: str) -> Optional[str]: + """Convert image to base64, SVG is converted to PNG first""" + target_path = image_path + temp_png = None + try: + if image_path.lower().endswith(".svg"): + try: + from svglib.svglib import svg2rlg + from reportlab.graphics import renderPM + import tempfile + drawing = svg2rlg(image_path) + fd, temp_png = tempfile.mkstemp(suffix=".png") + os.close(fd) + renderPM.drawToFile(drawing, temp_png, fmt="PNG") + target_path = temp_png + except ImportError: + print("Warning: svglib not installed, cannot convert SVG") + return None + + with open(target_path, "rb") as f: + return base64.b64encode(f.read()).decode() + except Exception as e: + print(f"Error processing image {image_path}: {e}") + return None + finally: + if temp_png and os.path.exists(temp_png): + try: + os.remove(temp_png) + except: + pass + + def describe_images_batch(self, image_paths: List[str], prompt: str = "请详细描述这些图片的内容。") -> List[str]: + """ + Batch describe images, up to 5 images per request. + """ + results = [] + # Group by max 5 images + for i in range(0, len(image_paths), 5): + batch = image_paths[i:i+5] + batch_results = self._call_qwen_vision_batch(batch, prompt) + results.extend(batch_results) + # 1 second interval between batches + if i + 5 < len(image_paths): + time.sleep(1) + return results + + def _call_qwen_vision_batch(self, image_paths: List[str], prompt: str) -> List[str]: + """Send a single batch (max 5 images) to Qwen Vision API""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + json_format_instruction = ( + "\n请严格按照以下 JSON 数组格式返回每张图片的描述内容,严禁包含任何额外的 Markdown 标记、解释性文字或示例内容:\n" + "[\"描述1\", \"描述2\", ...]\n" + f"注意:你必须提供正好 {len(image_paths)} 个字符串,每个字符串对应一张图片的详细描述。" + ) + + content = [{"type": "text", "text": f"{prompt} {json_format_instruction}"}] + + for path in image_paths: + img_b64 = self._prepare_image_data(path) + if img_b64: + content.append({ + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{img_b64}"}, + }) + else: + content.append({"type": "text", "text": f"[无法读取图片: {os.path.basename(path)}]"}) + + payload = { + "model": "qwen3.5-397b-a17b", + "messages": [{"role": "user", "content": content}], + "stream": False, + "response_format": {"type": "json_object"} + } + + try: + resp = requests.post(self.url, headers=headers, json=payload, timeout=120) + if resp.status_code == 200: + full_text = resp.json()["choices"][0]["message"]["content"] + + # Try to parse JSON + descriptions = [] + try: + start_idx = full_text.find("[") + end_idx = full_text.rfind("]") + if start_idx != -1 and end_idx != -1: + json_str = full_text[start_idx:end_idx+1] + data = json.loads(json_str) + if isinstance(data, list): + descriptions = [str(d) for d in data] + except: + pass + + # Fallback: regex matching + if len(descriptions) < len(image_paths): + matches = re.findall(r'"([^"]+)"', full_text) + if len(matches) >= len(image_paths): + descriptions = matches + + # Filter invalid descriptions + invalid_keywords = ["描述", "示例", "Image [", "图片"] + final_descriptions = [] + for d in descriptions: + if len(d) < 20 and any(k in d for k in invalid_keywords): + continue + final_descriptions.append(d) + + if len(final_descriptions) >= len(image_paths): + return final_descriptions[:len(image_paths)] + + # Last resort: split by markers + fallback_descs = [] + parts = re.split(r'Image \[\d+\]:|第\d+张图片:|^\d+\.', full_text) + for p in parts: + clean_p = p.strip() + if len(clean_p) > 10: + fallback_descs.append(clean_p) + + if len(fallback_descs) >= len(image_paths): + return fallback_descs[:len(image_paths)] + + print(f"Warning: Could not extract valid descriptions. Output length: {len(full_text)}") + return [full_text] * len(image_paths) + else: + return [f"API Error: {resp.status_code}"] * len(image_paths) + except Exception as e: + return [f"Request Exception: {e}"] * len(image_paths) + + def describe_image(self, image_path: str, prompt: str = "请详细描述这张图片的内容。") -> str: + """Single image description (backward compatible)""" + return self.describe_images_batch([image_path], prompt)[0] + + +def main(): + parser = argparse.ArgumentParser(description="Qwen Vision Tool for image description") + parser.add_argument("--image", "-i", help="Single image path to describe") + parser.add_argument("--images", "-I", nargs="+", help="Multiple image paths for batch description") + parser.add_argument("--prompt", "-p", default="请详细描述这张/这些图片的内容。", help="Prompt for description") + parser.add_argument("--output", "-o", help="Output file for results (JSON format for batch)") + + args = parser.parse_args() + + tool = QwenVisionTool() + + if args.image: + result = tool.describe_image(args.image, args.prompt) + print(f"\n描述结果:\n{result}") + + elif args.images: + results = tool.describe_images_batch(args.images, args.prompt) + print(f"\n批量描述结果:") + for i, (path, desc) in enumerate(zip(args.images, results)): + print(f"\n[{i+1}] {os.path.basename(path)}:") + print(f" {desc[:200]}..." if len(desc) > 200 else f" {desc}") + + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + json.dump(dict(zip(args.images, results)), f, ensure_ascii=False, indent=2) + print(f"\n结果已保存到: {args.output}") + + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/web_search.py b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/web_search.py new file mode 100644 index 00000000..0e3bf06a --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/scripts/web_search.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +""" +Web Search Tool for PPTX Pro Skill +Provides web search and image search capabilities using Tencent Search API. + +Usage: + python web_search.py --query "AI trends 2024" --type text --count 10 + python web_search.py --query "technology background" --type image --count 10 + +Limits: + - Text search: max 3 queries per session + - Image search: max 3 queries per session +""" + +import argparse +import json +import sys +import os +from typing import Dict, Any, Optional + +# Try to import from utils/config.py, fallback to defaults +try: + config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'metierial', 'utils', 'config.py') + if os.path.exists(config_path): + sys.path.insert(0, os.path.dirname(config_path)) + from config import TENCENT_ID, TENCENT_KEY, TENCENT_ENDPOINT, TENCENT_IMG_ENDPOINT + else: + raise ImportError("Config not found") +except ImportError: + # Default API configuration + TENCENT_ID = "TENCENT_SECRET_ID_PLACEHOLDER" + TENCENT_KEY = "TENCENT_SECRET_KEY_PLACEHOLDER" + TENCENT_ENDPOINT = "wsa.tencentcloudapi.com" + TENCENT_IMG_ENDPOINT = "wimgs.tencentcloudapi.com" + + +class TencentSearchTool: + """Tencent Search API wrapper""" + + def __init__(self, endpoint: str = TENCENT_ENDPOINT, service: str = "wsa", version: str = "2025-05-08"): + try: + from tencentcloud.common import credential + from tencentcloud.common.profile.client_profile import ClientProfile + from tencentcloud.common.profile.http_profile import HttpProfile + from tencentcloud.common.common_client import CommonClient + + self.cred = credential.Credential(TENCENT_ID, TENCENT_KEY) + httpProfile = HttpProfile() + httpProfile.endpoint = endpoint + clientProfile = ClientProfile() + clientProfile.httpProfile = httpProfile + self.client = CommonClient(service, version, self.cred, "", profile=clientProfile) + self.available = True + except ImportError: + print("Warning: tencentcloud-sdk-python not installed. Search functionality disabled.") + print("Install with: pip install tencentcloud-sdk-python") + self.available = False + + def search(self, query: str, action: str = "SearchPro", params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + if not self.available: + return {"error": "Tencent SDK not available"} + + if params is None: + params = {"Query": query} + + try: + resp = self.client.call_json(action, params) + return resp + except Exception as e: + return {"error": str(e)} + + +# Session limits and cache +_search_cache = set() +_search_count = 0 +_image_search_count = 0 + + +def web_search(query: str, count: int = 10) -> str: + """ + Search the web using Tencent Search API. + Returns passage results. + + Limits: 3 searches per session. + """ + global _search_cache, _search_count + + if _search_count >= 3: + return "Warning: Reached search limit (3). Please use existing information." + + if query in _search_cache: + return "Warning: This query was already executed. Avoid duplicate searches." + + _search_cache.add(query) + _search_count += 1 + + try: + print(f" Searching for: '{query}'...", end="", flush=True) + searcher = TencentSearchTool(endpoint=TENCENT_ENDPOINT, service="wsa", version="2025-05-08") + + params = {"Query": query, "Mode": 0} + if count in [10, 20, 30, 40, 50]: + params["Cnt"] = count + + result = searcher.search(query, action="SearchPro", params=params) + + if "error" in result: + print(f" FAILED: {result['error']}") + return f"Search failed: {result['error']}" + + pages = result.get("Response", {}).get("Pages", []) + formatted_results = [] + for page_str in pages: + try: + page = json.loads(page_str) + title = page.get("title", "No title") + url = page.get("url", "No URL") + passage = page.get("passage", "") + formatted_results.append(f"Title: {title}\nURL: {url}\nSummary: {passage}\n---") + except: + continue + + res_str = "\n".join(formatted_results) if formatted_results else "No results found." + print(f" Found {len(formatted_results)} results.") + return res_str + except Exception as e: + print(f" Exception: {e}") + return f"Search exception: {str(e)}" + + +def image_search(query: str, count: int = 10) -> str: + """ + Search for images using Tencent Image Search API. + Returns image URLs and descriptions. + + Limits: 3 image searches per session. + """ + global _image_search_count + + if _image_search_count >= 3: + return "Warning: Reached image search limit (3). Use found images." + + _image_search_count += 1 + + try: + print(f" Searching images for: '{query}'...", end="", flush=True) + searcher = TencentSearchTool(endpoint=TENCENT_ENDPOINT, service="wsa", version="2025-05-08") + + # Mode=2 includes image results + params = {"Query": query, "Mode": 2, "Cnt": 10} + result = searcher.search(query, action="SearchPro", params=params) + + # Fallback to wimgs endpoint if needed + if "error" in result: + searcher_fallback = TencentSearchTool(endpoint=TENCENT_IMG_ENDPOINT, service="wimgs", version="2022-08-18") + result = searcher_fallback.search(query, action="SearchImage", params={"Query": query}) + + if "error" in result and "InvalidAction" in result["error"]: + result = searcher_fallback.search(query, action="SearchPro", params={"Query": query}) + + if "error" in result: + print(f" FAILED: {result['error']}") + return f"Image search failed: {result['error']}" + + pages = result.get("Response", {}).get("Pages", []) + image_results = [] + for page_str in pages: + try: + page = json.loads(page_str) + imgs = page.get("images", []) + title = page.get("title", "Related image") + for img_url in imgs: + image_results.append(f"Description: {title}\nURL: {img_url}") + except: + continue + + res_str = "\n".join(image_results[:count]) if image_results else "No images found." + print(f" Found {len(image_results)} images.") + return res_str + except Exception as e: + print(f" Exception: {e}") + return f"Image search exception: {str(e)}" + + +def main(): + parser = argparse.ArgumentParser(description="Web Search Tool for PPTX Pro") + parser.add_argument("--query", "-q", required=True, help="Search query") + parser.add_argument("--type", "-t", choices=["text", "image"], default="text", help="Search type") + parser.add_argument("--count", "-c", type=int, default=10, help="Number of results") + parser.add_argument("--output", "-o", help="Output file for results") + + args = parser.parse_args() + + if args.type == "text": + result = web_search(args.query, args.count) + else: + result = image_search(args.query, args.count) + + print(f"\n{result}") + + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + f.write(result) + print(f"\nResults saved to: {args.output}") + + +if __name__ == "__main__": + main() diff --git a/libs/hexagent_demo/backend/skills/pptx-plus-linux/skill.md b/libs/hexagent_demo/backend/skills/pptx-plus-linux/skill.md new file mode 100644 index 00000000..718d6eed --- /dev/null +++ b/libs/hexagent_demo/backend/skills/pptx-plus-linux/skill.md @@ -0,0 +1,427 @@ +--- +name: pptx-plus-linux +description: "处理 .pptx 文件(创建、读取、编辑、合并、拆分)。支持幻灯片生成、图表添加和模板管理。在处理演示文稿、deck 或 slides 时触发。" +license: 专有软件。完整条款请参阅 LICENSE.txt +--- + +# PPTX Plus Skill (Linux) + +## 快速参考 + +| 任务 | 指南 | +| ------------------ | ---------------------------------------- | +| 读取/分析内容 | `python -m markitdown presentation.pptx` | +| 编辑或基于模板创建 | 阅读 [editing.md](editing.md) | +| 从零创建 | 阅读 [pptxgenjs.md](pptxgenjs.md) | +| 可视化检查与 QA | 阅读 [examin.md](examin.md) | +| **添加图表** | 见下方「图表生成」章节 | +| 搜索网络素材 | `python scripts/web_search.py` | + +--- + +## ⚠️ 重要:分批写入策略 + +**使用 PptxGenJS 创建 PPTX 时,务必采用分批写入以避免 token 溢出错误。** + +### 为什么要分批写入? + +生成复杂、视觉效果丰富的演示文稿时,代码可能变得非常长。在单次响应中写入所有幻灯片可能导致 token 溢出错误。**分批写入同一个文件可解决此问题。** + +### 如何分批写入 + +1. **严格限制:每批最多 5 张幻灯片** — 5 张是最佳选择。 +2. **一个文件,多次编辑**:所有代码必须写入**同一个 JavaScript 文件**。不要为不同批次创建多个文件。 +3. **增量追加策略**:使用 Edit 工具将新幻灯片代码**追加**到现有文件中。每批应继续使用第一批中定义的同一个 `pres` 对象。 +4. **构建支持增量添加的代码结构:** + +```javascript +// 批次 1:初始化设置 + 幻灯片 1-5 +const pptxgen = require('pptxgenjs') + +let pres = new pptxgen() +pres.layout = 'LAYOUT_16x9' + +// 幻灯片 1-5 代码在此... +// 可选的增量保存检查点 + +// --- 批次 1 结束 --- + +// 批次 2:幻灯片 6-10(继续向同一个 pres 对象添加) +// 更多幻灯片代码... + +// 批次 N:最终保存 +pres.writeFile({ fileName: 'output.pptx' }) +``` + +### 分批写入工作流 + +``` +步骤 1:写入初始设置 + 幻灯片 1-5 → 保存/继续 +步骤 2:写入幻灯片 6-10 → 继续使用同一个 pres 对象 +步骤 3:写入幻灯片 11-15 → 继续 +... +最终:写入幻灯片 N-M + pres.writeFile() +``` + +**请记住:** 你是增量地向同一个 JavaScript 文件添加内容。每批向文件添加更多代码,而不是创建单独的文件。 + +--- + +## 读取内容 + +```bash +# 文本提取 +python -m markitdown presentation.pptx + +# 可视化概览 +python scripts/thumbnail.py presentation.pptx + +# 原始 XML +python scripts/office/unpack.py presentation.pptx unpacked/ +``` + +--- + +## 编辑工作流 + +**完整详情请阅读** **[editing.md](editing.md)。** + +1. 使用 `thumbnail.py` 分析模板 +2. 解包 → 操作幻灯片 → 编辑内容 → 清理 → 打包 + +--- + +## 图表生成 + +**为演示文稿添加精美图表,让数据可视化更具冲击力。** + +### 图表类型选择指南 + +根据数据特征选择最合适的图表类型: + +| 数据类型 | 推荐图表 | 用途 | +| ------------ | -------------------------------------------------------------------------------------------------------------------- | -------------------------------------------- | +| **时间序列** | `line_chart`, `area_chart` | 趋势、累积变化 | +| **对比** | `bar_chart`, `column_chart` | 类别对比、Top-N 排行 | +| **占比** | `pie_chart`, `treemap_chart` | 整体与部分、层级占比 | +| **相关性** | `scatter_chart`, `dual_axes_chart` | 变量关系、双轴对比 | +| **流程** | `funnel_chart`, `flow_diagram` | 转化漏斗、流程步骤 | +| **分布** | `histogram_chart`, `boxplot_chart`, `violin_chart` | 频率分布、统计分布 | +| **层级** | `organization_chart`, `mind_map` | 组织结构、思维导图 | +| **地理** | `district_map`, `pin_map`, `path_map` | 区域数据、点位、路线 | +| **专项** | `radar_chart`, `liquid_chart`, `word_cloud_chart`, `network_graph`, `sankey_chart`, `venn_chart`, `fishbone_diagram` | 多维对比、进度、词频、网络、流向、交集、因果 | + +### 图表生成方法 + +#### 方法一:图片图表(推荐用于复杂图表) + +生成高质量图表图片,然后插入幻灯片。适合需要精美视觉效果或复杂图表类型。 + +```bash +# 生成图表图片 +node scripts/generate.js '{"tool":"generate_pie_chart","args":{"data":[{"category":"A","value":35},{"category":"B","value":45},{"category":"C","value":20}],"title":"市场份额","theme":"dark"}}' +``` + +返回图表图片 URL,然后在 JavaScript 中使用: + +```javascript +// 在 PptxGenJS 中插入图表图片 +slide.addImage({ + path: '返回的图表URL', + x: 0.5, + y: 1.5, + w: 4.5, + h: 3.5 +}) +``` + +**图表参数规格详见** **[references/](references/)** **目录下的各图表文档。** + +#### 方法二:原生图表(适合简单图表) + +使用 PptxGenJS 内置图表功能,适合快速创建简单柱状图、折线图、饼图。 + +```javascript +// 柱状图 +slide.addChart(pres.charts.BAR, [{ + name: '销售额', + labels: ['Q1', 'Q2', 'Q3', 'Q4'], + values: [4500, 5500, 6200, 7100] +}], { + x: 0.5, + y: 0.6, + w: 6, + h: 3, + barDir: 'col', + showTitle: true, + title: '季度销售', + chartColors: ['0D9488', '14B8A6', '5EEAD4'], + showValue: true, + dataLabelPosition: 'outEnd' +}) + +// 饼图 +slide.addChart(pres.charts.PIE, [{ + name: '份额', + labels: ['A', 'B', '其他'], + values: [35, 45, 20] +}], { x: 7, y: 1, w: 5, h: 4, showPercent: true }) +``` + +### 方法选择建议 + +| 场景 | 推荐方法 | 原因 | +| -------------------------------- | -------- | ---------------- | +| 简单柱状/折线/饼图 | 原生图表 | 快速、代码简洁 | +| 需要与PPT主题配色统一 | 原生图表 | 可自定义颜色 | +| 复杂图表类型(雷达图、桑基图等) | 图片图表 | 原生不支持 | +| 需要精美视觉效果 | 图片图表 | 更丰富的视觉样式 | +| 需要动态交互 | 原生图表 | 可在PPT中编辑 | +| 暗色主题/特殊样式 | 图片图表 | 支持多种主题 | + +### 图表主题与样式 + +图片图表支持三种主题: + +- `default` - 标准白色背景 +- `dark` - 深色背景,适合深色PPT +- `academy` - 学术风格 + +自定义配色: + +```json +{ + "tool": "generate_column_chart", + "args": { + "data": [...], + "title": "销售数据", + "theme": "dark", + "style": { + "palette": ["#1E2761", "#CADCFC", "#FFFFFF"], + "backgroundColor": "#1a1a2e" + } + } +} +``` + +### 详细图表规格 + +每种图表的完整参数说明,请参阅对应的参考文档: + +- `references/generate_line_chart.md` - 折线图 +- `references/generate_bar_chart.md` - 条形图 +- `references/generate_column_chart.md` - 柱状图 +- `references/generate_pie_chart.md` - 饼图/环图 +- `references/generate_area_chart.md` - 面积图 +- `references/generate_scatter_chart.md` - 散点图 +- `references/generate_radar_chart.md` - 雷达图 +- `references/generate_funnel_chart.md` - 漏斗图 +- `references/generate_treemap_chart.md` - 树图 +- `references/generate_sankey_chart.md` - 桑基图 +- `references/generate_dual_axes_chart.md` - 双轴图 +- 以及其他 15+ 种图表类型 + +--- + +## 网络搜索(腾讯搜索) + +搜索网络内容和图片以丰富你的演示文稿。 + +```bash +# 文本搜索(返回文本段落) +python scripts/web_search.py --query "AI 趋势 2026" --count 10 + +# 图片搜索(返回图片 URL) +python scripts/web_search.py --query "科技背景" --type image --count 10 +``` + +**限制:** + +- 文本搜索:每次会话最多 10 次查询 +- 图片搜索:每次会话最多 10 次查询 + +**使用场景:** + +- 收集事实和数据 +- 寻找设计参考图片 +- 研究主题背景 + +## 从零创建 + +**完整详情请阅读** **[pptxgenjs.md](pptxgenjs.md)。** + +当没有模板或参考演示文稿可用时使用。 + +--- + +## 设计思路 + +**不要创建无聊的幻灯片。** 白底黑字的简单列表无法打动任何人。为每张幻灯片考虑以下设计思路。 + +### 开始之前 + +- **选择大胆、契合内容的配色方案**:配色应为此主题量身设计。如果将你的配色方案换到一个完全不同的演示文稿中仍然"适用",说明你的选择还不够具体。 +- **主次分明而非均等分配**:一种颜色应占主导地位(60-70% 视觉权重),配以 1-2 种辅助色调和一种锐利的强调色。永远不要给所有颜色相等的权重。 +- **深浅对比**:标题 + 结尾幻灯片使用深色背景,内容幻灯片使用浅色背景("三明治"结构)。或者全程使用深色背景以营造高端感。 +- **坚持一个视觉母题**:选择一个独特的元素并重复使用 —— 圆角图片框、彩色圆形图标、单侧粗边框。在每张幻灯片中贯彻使用。 + +### 配色方案 + +选择与主题匹配的颜色 —— 不要默认使用通用蓝色。以下配色方案供参考: + +| 主题 | 主色 | 辅色 | 强调色 | +| -------------- | ------------------ | ------------------ | ------------------ | +| **午夜高管** | `1E2761`(藏蓝) | `CADCFC`(冰蓝) | `FFFFFF`(白色) | +| **森林苔藓** | `2C5F2D`(森林绿) | `97BC62`(苔藓绿) | `F5F5F5`(奶油色) | +| **珊瑚活力** | `F96167`(珊瑚红) | `F9E795`(金色) | `2F3C7E`(藏蓝) | +| **暖赤陶** | `B85042`(赤陶色) | `E7E8D1`(沙色) | `A7BEAE`(鼠尾草) | +| **海洋渐变** | `065A82`(深海蓝) | `1C7293`(青色) | `21295C`(午夜蓝) | +| **炭灰极简** | `36454F`(炭灰) | `F2F2F2`(灰白) | `212121`(黑色) | +| **青绿信赖** | `028090`(青色) | `00A896`(海泡色) | `02C39A`(薄荷绿) | +| **浆果奶油** | `6D2E46`(浆果色) | `A26769`(玫瑰灰) | `ECE2D0`(奶油色) | +| **鼠尾草宁静** | `84B59F`(鼠尾草) | `69A297`(桉树绿) | `50808E`(板岩灰) | +| **樱桃大胆** | `990011`(樱桃红) | `FCF6F5`(灰白) | `2F3C7E`(藏蓝) | + +### 每张幻灯片 + +**每张幻灯片都需要一个视觉元素** —— 图片、图表、图标或形状。纯文字的幻灯片容易被遗忘。 + +**布局选项:** + +- 双栏(左侧文字,右侧插图) +- 图标 + 文字行(彩色圆圈中的图标,粗体标题,下方描述) +- 2x2 或 2x3 网格(一侧放图片,另一侧放内容块网格) +- 半出血图片(完整的左侧或右侧)配内容覆盖 + +**数据展示:** + +- 大号数据突出(60-72pt 大数字,下方小标签) +- 对比栏(前后对比、优缺点、并排选项) +- 时间线或流程图(编号步骤,箭头) +- **精美图表**(使用图表生成功能,数据可视化更具冲击力) + +**视觉打磨:** + +- 章节标题旁的小彩色圆圈图标 +- 关键数据或标语使用斜体强调文字 + +### 排版 + +**选择有趣的字体搭配** —— 不要默认使用 Arial。选择有个性的标题字体,搭配清晰的正文字体。 + +| 标题字体 | 正文字体 | +| ------------ | ------------- | +| Georgia | Calibri | +| Arial Black | Arial | +| Calibri | Calibri Light | +| Cambria | Calibri | +| Trebuchet MS | Calibri | +| Impact | Arial | +| Palatino | Garamond | +| Consolas | Calibri | + +| 元素 | 字号 | +| ---------- | ------------ | +| 幻灯片标题 | 36-44pt 粗体 | +| 章节标题 | 20-24pt 粗体 | +| 正文文本 | 14-16pt | +| 说明文字 | 10-12pt 弱化 | + +### 间距 + +- 最小边距 0.5 英寸 +- 内容块之间 0.3-0.5 英寸 +- 留出呼吸空间 —— 不要填满每一寸 + +### 避免事项(常见错误) + +- **不要重复使用相同布局** —— 在幻灯片间变化使用栏、卡片和突出显示 +- **正文不要居中** —— 段落和列表左对齐;只有标题居中 +- **不要吝啬字号对比** —— 标题需要 36pt+ 才能与 14-16pt 正文区分 +- **不要默认使用蓝色** —— 选择反映特定主题的颜色 +- **不要随意混合间距** —— 选择 0.3" 或 0.5" 间隙并保持一致 +- **不要只设计一张幻灯片而让其他保持朴素** —— 要么完全投入,要么全程保持简洁 +- **不要创建纯文字幻灯片** —— 添加图片、图标、图表或视觉元素;避免纯标题 + 列表 +- **不要忘记文本框内边距** —— 当将线条或形状与文本边缘对齐时,在文本框上设置 `margin: 0` 或偏移形状以考虑内边距 +- **不要使用低对比度元素** —— 图标和文字都需要与背景形成强对比;避免浅色背景上的浅色文字或深色背景上的深色文字 +- **绝对不要在标题下使用装饰线** —— 这是 AI 生成幻灯片的标志;改用留白或背景色 +- **绝对不要在 JavaScript 中使用中文引号(如:" ")** —— 会导致 PptxGenJS 崩溃或生成损坏文件;始终使用标准 ASCII 引号(`' '` 或 `" "`) + +### 设计质量检查清单 + +创建完幻灯片后,对照以下清单进行自我检查: + +**布局与对齐** + +- [ ] 图片、表格、图表是否对齐(底部或顶部对齐)? +- [ ] 文字块之间间距是否一致(统一使用 0.3" 或 0.5")? +- [ ] 是否避免了"后加"的感觉——底部内容是否与整体融为一体? + +**视觉层次** + +- [ ] 标题字号是否足够大(36pt+)与正文区分? +- [ ] 是否有清晰的视觉焦点(主图、核心数据、关键结论)? +- [ ] 信息密度是否适中——既不拥挤也不空洞? + +**图表与图片** + +- [ ] 图表颜色是否与整体配色方案协调? +- [ ] 图表是否与文物/照片风格统一? +- [ ] 图表是否放置在合适的位置——不是孤立在角落? + +**内容完整性** + +- [ ] 每张幻灯片是否有明确的单一主题? +- [ ] 数据是否有来源标注? +- [ ] 结论是否清晰可见? + +**可视化检查** + +```bash +# 生成缩略图检查整体效果 +python scripts/ppt_to_pic.py --file presentation.pptx --output thumbnails + +# 使用 Qwen 视觉分析 +python scripts/vision_qwen.py --image thumbnails/slide1.PNG --prompt "分析这张幻灯片的设计质量和改进建议" +``` + +--- + +## 依赖项 + +**核心依赖:** + +- `pip install "markitdown[pptx]"` - 文本提取 +- `pip install Pillow` - 缩略图网格 +- `npm install -g pptxgenjs` - 从零创建 +- LibreOffice (`soffice`) - PDF 转换(Linux) +- Poppler (`pdftoppm`) - PDF 转图片 + +**可视化工具:** + +- `pip install tencentcloud-sdk-python` - 腾讯搜索 API +- `pip install svglib reportlab` - SVG 转 PNG,用于视觉工具 + +**图表生成:** + +- Node.js >= 18.0.0 - 运行图表生成脚本 + +--- + +## 环境设置 + +为获得可视化工具和网络搜索的最佳效果: + +```bash +# 验证工具工作正常 +python scripts/web_search.py --query "test" --count 1 + +# 验证图表生成 +node scripts/generate.js '{"tool":"generate_column_chart","args":{"data":[{"category":"测试","value":100}],"title":"测试图表"}}' + +# 验证 LibreOffice 安装 +soffice --version + +# 验证 pdftoppm 安装 +pdftoppm -v +``` diff --git a/libs/hexagent_demo/electron/main.js b/libs/hexagent_demo/electron/main.js index c31b4f6a..fa2bca61 100644 --- a/libs/hexagent_demo/electron/main.js +++ b/libs/hexagent_demo/electron/main.js @@ -416,6 +416,55 @@ try { }; }); +ipcMain.handle("restart-windows-now", async () => { + if (process.platform !== "win32") { + return { ok: false, message: "This action is only available on Windows." }; + } + + // First try a normal restart request. + let res = await runCommand("shutdown.exe", ["/r", "/t", "0"]); + if (res.code === 0) { + return { ok: true, message: "Windows restart has been triggered." }; + } + + // Fallback with elevation prompt when policy/permissions block direct call. + const psScript = ` +$ErrorActionPreference = 'Stop' +try { + Start-Process -FilePath shutdown.exe -ArgumentList @('/r','/t','0') -Verb RunAs + exit 0 +} catch { + $msg = $_.Exception.Message + if ([string]::IsNullOrWhiteSpace($msg)) { $msg = "Unknown restart failure." } + Write-Output ("RESTART_ERR:" + $msg) + exit 1 +} +`.trim(); + + res = await runCommand("powershell.exe", [ + "-NoProfile", + "-ExecutionPolicy", + "Bypass", + "-Command", + psScript, + ]); + + if (res.code === 0) { + return { ok: true, message: "Windows restart has been triggered." }; + } + + const combined = `${res.stderr || ""}\n${res.stdout || ""}`.trim(); + const restartErr = (combined.match(/RESTART_ERR:(.*)/) || [null, ""])[1]?.trim(); + const cancelled = /canceled|cancelled|拒绝|已取消|denied/i.test(combined); + if (cancelled) { + return { ok: false, message: "Restart was cancelled." }; + } + return { + ok: false, + message: restartErr || combined || `Failed to trigger restart (exit ${res.code}).`, + }; +}); + // ── Window ─────────────────────────────────────────────────────────────────── function createWindow() { diff --git a/libs/hexagent_demo/electron/preload.js b/libs/hexagent_demo/electron/preload.js index 279d4ef0..4c2a8b6a 100644 --- a/libs/hexagent_demo/electron/preload.js +++ b/libs/hexagent_demo/electron/preload.js @@ -7,4 +7,5 @@ contextBridge.exposeInMainWorld("electronAPI", { platform: process.platform, checkWslPrerequisites: () => ipcRenderer.invoke("check-wsl-prerequisites"), installWslRuntime: () => ipcRenderer.invoke("install-wsl-runtime"), + restartWindowsNow: () => ipcRenderer.invoke("restart-windows-now"), }); diff --git a/libs/hexagent_demo/electron/scripts/build-backend.ps1 b/libs/hexagent_demo/electron/scripts/build-backend.ps1 index 16cbd8e2..f6f0dc05 100644 --- a/libs/hexagent_demo/electron/scripts/build-backend.ps1 +++ b/libs/hexagent_demo/electron/scripts/build-backend.ps1 @@ -3,6 +3,16 @@ $ErrorActionPreference = "Stop" $ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path $ElectronDir = Resolve-Path "$ScriptDir\.." $BackendDir = Resolve-Path "$ElectronDir\..\backend" +$ConfigSource = Join-Path $BackendDir "config.json" +$TempConfigCreated = $false + +if (-not (Test-Path $ConfigSource)) { + # Keep packaging resilient in CI/local envs where config.json is not present. + # Electron will still seed userData/config.json from this bundled default. + $ConfigSource = Join-Path $BackendDir ".packaged-default-config.json" + Set-Content -Path $ConfigSource -Value "{}" -Encoding UTF8 + $TempConfigCreated = $true +} Write-Host "==> Installing PyInstaller..." Set-Location $BackendDir @@ -31,6 +41,7 @@ $pyinstallerArgs = @( "--collect-data", "hexagent", "--add-data", "../../hexagent/sandbox/vm;sandbox/vm", "--add-data", "skills;skills", + "--add-data", "$ConfigSource;.", "hexagent_api/server.py" ) @@ -39,23 +50,30 @@ if (-not (Test-Path "$BackendDir\skills")) { New-Item -ItemType Directory -Path "$BackendDir\skills" | Out-Null } -if (Get-Command uv -ErrorAction SilentlyContinue) { - uv pip install pyinstaller - Write-Host "==> Building backend with PyInstaller (uv)..." - uv run pyinstaller @pyinstallerArgs -} else { - $venvPython = Join-Path $BackendDir ".venv\Scripts\python.exe" - if (-not (Test-Path $venvPython)) { - throw "uv not found and backend venv python missing: $venvPython" +try { + if (Get-Command uv -ErrorAction SilentlyContinue) { + uv pip install pyinstaller + Write-Host "==> Building backend with PyInstaller (uv)..." + uv run pyinstaller @pyinstallerArgs + } else { + $venvPython = Join-Path $BackendDir ".venv\Scripts\python.exe" + if (-not (Test-Path $venvPython)) { + throw "uv not found and backend venv python missing: $venvPython" + } + Write-Host "==> uv not found, using backend venv python fallback..." + & $venvPython -m PyInstaller @pyinstallerArgs } - Write-Host "==> uv not found, using backend venv python fallback..." - & $venvPython -m PyInstaller @pyinstallerArgs -} -Write-Host "==> Copying dist to electron/backend_dist..." -if (Test-Path "$ElectronDir\backend_dist") { - Remove-Item -Recurse -Force "$ElectronDir\backend_dist" -} -Copy-Item -Recurse "$BackendDir\dist\hexagent_api_server" "$ElectronDir\backend_dist" + Write-Host "==> Copying dist to electron/backend_dist..." + if (Test-Path "$ElectronDir\backend_dist") { + Remove-Item -Recurse -Force "$ElectronDir\backend_dist" + } + Copy-Item -Recurse "$BackendDir\dist\hexagent_api_server" "$ElectronDir\backend_dist" -Write-Host "==> Backend build complete." + Write-Host "==> Backend build complete." +} +finally { + if ($TempConfigCreated -and (Test-Path $ConfigSource)) { + Remove-Item -Force $ConfigSource + } +} diff --git a/libs/hexagent_demo/electron/scripts/build-backend.sh b/libs/hexagent_demo/electron/scripts/build-backend.sh index 10661e62..697df61a 100755 --- a/libs/hexagent_demo/electron/scripts/build-backend.sh +++ b/libs/hexagent_demo/electron/scripts/build-backend.sh @@ -12,6 +12,23 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" ELECTRON_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" BACKEND_DIR="$(cd "$ELECTRON_DIR/../backend" && pwd)" TARGET_ARCH="${1:-}" +CONFIG_SOURCE="$BACKEND_DIR/config.json" +TEMP_CONFIG_CREATED=0 + +if [ ! -f "$CONFIG_SOURCE" ]; then + # Keep packaging resilient in CI/local envs where config.json is not present. + # Electron will still seed userData/config.json from this bundled default. + CONFIG_SOURCE="$BACKEND_DIR/.packaged-default-config.json" + printf '{}\n' > "$CONFIG_SOURCE" + TEMP_CONFIG_CREATED=1 +fi + +cleanup_temp_config() { + if [ "$TEMP_CONFIG_CREATED" = "1" ] && [ -f "$CONFIG_SOURCE" ]; then + rm -f "$CONFIG_SOURCE" + fi +} +trap cleanup_temp_config EXIT # ── PyInstaller flags (shared) ── PYINSTALLER_ARGS=( @@ -40,6 +57,7 @@ PYINSTALLER_ARGS=( --collect-data hexagent --add-data "skills:skills" --add-data "../../hexagent/sandbox/vm:sandbox/vm" + --add-data "$CONFIG_SOURCE:." hexagent_api/server.py ) diff --git a/libs/hexagent_demo/frontend/src/components/RestartRequiredModal.tsx b/libs/hexagent_demo/frontend/src/components/RestartRequiredModal.tsx index 0cfc1f25..e2d42789 100644 --- a/libs/hexagent_demo/frontend/src/components/RestartRequiredModal.tsx +++ b/libs/hexagent_demo/frontend/src/components/RestartRequiredModal.tsx @@ -1,6 +1,7 @@ -import { AlertTriangle, RefreshCw, Settings } from "lucide-react"; +import { AlertTriangle, Loader2, Settings } from "lucide-react"; +import { useState } from "react"; import { useTranslation } from "react-i18next"; -import { useVMSetup } from "../vmSetup"; +import { useAppContext } from "../store"; interface RestartRequiredModalProps { open: boolean; @@ -14,7 +15,30 @@ export default function RestartRequiredModal({ onOpenSettings, }: RestartRequiredModalProps) { const { t } = useTranslation("misc"); - const vm = useVMSetup(); + const { dispatch } = useAppContext(); + const [restarting, setRestarting] = useState(false); + + const handleRestartNow = async () => { + if (restarting) return; + const confirmed = window.confirm(t("restartRequired.confirmRestartNow")); + if (!confirmed) return; + + setRestarting(true); + try { + const api = window.electronAPI?.restartWindowsNow; + if (!api) { + throw new Error(t("restartRequired.restartNotSupported")); + } + const res = await api(); + if (!res?.ok) { + throw new Error(res?.message || t("restartRequired.restartFailed")); + } + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : t("restartRequired.restartFailed"); + dispatch({ type: "SHOW_NOTIFICATION", payload: { message: msg, type: "error" } }); + setRestarting(false); + } + }; if (!open) return null; @@ -39,9 +63,9 @@ export default function RestartRequiredModal({ {t("restartRequired.openSandboxSettings")} - - - {t("restartRequired.recheck")} + + {restarting ? : null} + {restarting ? t("restartRequired.restarting") : t("restartRequired.restartNow")} diff --git a/libs/hexagent_demo/frontend/src/components/SettingsModal.tsx b/libs/hexagent_demo/frontend/src/components/SettingsModal.tsx index 59253023..bcd765db 100644 --- a/libs/hexagent_demo/frontend/src/components/SettingsModal.tsx +++ b/libs/hexagent_demo/frontend/src/components/SettingsModal.tsx @@ -13,7 +13,7 @@ import type { PhaseStatus } from "../vmSetup"; /** Available languages. Add new entries here to support more languages. */ const LANGUAGES = [ { code: "en", label: "English" }, - { code: "zh-CN", label: "绠€浣撲腑鏂? }, + { code: "zh-CN", label: "Chinese (Simplified)" }, ] as const; interface SettingsModalProps { diff --git a/libs/hexagent_demo/frontend/src/components/WelcomeScreen.tsx b/libs/hexagent_demo/frontend/src/components/WelcomeScreen.tsx index 91797da3..a35c4648 100644 --- a/libs/hexagent_demo/frontend/src/components/WelcomeScreen.tsx +++ b/libs/hexagent_demo/frontend/src/components/WelcomeScreen.tsx @@ -386,7 +386,7 @@ export default function WelcomeScreen({ onSubmit, mode, onOpenSettings }: Welcom )} {sandboxBlocked && ( - {missingE2bKey ? t("e2bKeyRequired") : t("vmSetupRequired")} 鈥攞" "} + {missingE2bKey ? t("e2bKeyRequired") : t("vmSetupRequired")}{" - "} onOpenSettings("sandbox")}> {t("setupInSettings")} diff --git a/libs/hexagent_demo/frontend/src/electron.d.ts b/libs/hexagent_demo/frontend/src/electron.d.ts index ed453916..c665b9f3 100644 --- a/libs/hexagent_demo/frontend/src/electron.d.ts +++ b/libs/hexagent_demo/frontend/src/electron.d.ts @@ -28,6 +28,10 @@ declare global { stdout?: string; stderr?: string; }>; + restartWindowsNow?: () => Promise<{ + ok: boolean; + message?: string; + }>; }; } } diff --git a/libs/hexagent_demo/frontend/src/locales/en/misc.json b/libs/hexagent_demo/frontend/src/locales/en/misc.json index 1453eb3b..45bdbc4f 100644 --- a/libs/hexagent_demo/frontend/src/locales/en/misc.json +++ b/libs/hexagent_demo/frontend/src/locales/en/misc.json @@ -22,7 +22,12 @@ "wslComplete": "WSL runtime installation is complete, but Windows must restart before OpenAgent can continue.", "pleaseRestart": "Please restart your computer now, otherwise VM/Cowork features will not work.", "openSandboxSettings": "Open Sandbox Settings", - "recheck": "I have restarted, Re-check" + "recheck": "I have restarted, Re-check", + "restartNow": "Restart Now", + "restarting": "Restarting...", + "confirmRestartNow": "Restart Windows now? Unsaved work in other apps may be lost.", + "restartNotSupported": "Immediate restart is only available in the desktop app on Windows.", + "restartFailed": "Failed to trigger Windows restart." }, "vmSetup": { "settingUp": "Setting up VM", diff --git a/libs/hexagent_demo/frontend/src/locales/zh-CN/misc.json b/libs/hexagent_demo/frontend/src/locales/zh-CN/misc.json index ab5e58e8..f4e26a88 100644 --- a/libs/hexagent_demo/frontend/src/locales/zh-CN/misc.json +++ b/libs/hexagent_demo/frontend/src/locales/zh-CN/misc.json @@ -22,7 +22,12 @@ "wslComplete": "WSL 运行时安装已完成,但 Windows 需要重启才能继续使用 OpenAgent。", "pleaseRestart": "请立即重启计算机,否则虚拟机/协作功能将无法使用。", "openSandboxSettings": "打开沙盒设置", - "recheck": "已重启,重新检查" + "recheck": "已重启,重新检查", + "restartNow": "立即重启", + "restarting": "正在重启...", + "confirmRestartNow": "确认立即重启 Windows 吗?其他应用中未保存的内容可能会丢失。", + "restartNotSupported": "仅 Windows 桌面客户端支持立即重启。", + "restartFailed": "触发 Windows 重启失败。" }, "vmSetup": { "settingUp": "正在配置虚拟机",