diff --git a/Dockerfile b/Dockerfile index 996fe9e91..8f8f55e5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,6 +25,11 @@ RUN npm install -g pnpm RUN npm install -g openclaw@2026.2.3 \ && openclaw --version +# Install Python3 + pip, jq, and skill dependencies (pdf-tools, excel, docx handling) +RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip jq \ + && pip3 install pdfplumber PyPDF2 openpyxl python-docx \ + && rm -rf /var/lib/apt/lists/* + # Create OpenClaw directories # Legacy .clawdbot paths are kept for R2 backup migration RUN mkdir -p /root/.openclaw \ @@ -32,12 +37,13 @@ RUN mkdir -p /root/.openclaw \ && mkdir -p /root/clawd/skills # Copy startup script -# Build cache bust: 2026-02-11-v30-rclone +# Build cache bust: 2026-02-16-v35-strip-suspect-config COPY start-openclaw.sh /usr/local/bin/start-openclaw.sh RUN chmod +x /usr/local/bin/start-openclaw.sh -# Copy custom skills +# Copy custom skills and ensure scripts are executable COPY skills/ /root/clawd/skills/ +RUN find /root/clawd/skills -name "*.sh" -exec chmod +x {} \; # Set working directory WORKDIR /root/clawd diff --git a/skills/cloudflare-browser/SKILL.md b/skills/cloudflare-browser/SKILL.md index 0c89c4b39..7d9584558 100644 --- a/skills/cloudflare-browser/SKILL.md +++ b/skills/cloudflare-browser/SKILL.md @@ -1,6 +1,6 @@ --- name: cloudflare-browser -description: Control headless Chrome via Cloudflare Browser Rendering CDP WebSocket. Use for screenshots, page navigation, scraping, and video capture when browser automation is needed in a Cloudflare Workers environment. Requires CDP_SECRET env var and cdpUrl configured in browser.profiles. +description: Web search, page fetching, screenshots, and browser automation via Cloudflare Browser Rendering CDP. Use search.js for web research, fetch.js to read web pages, screenshot.js for visual capture. Requires CDP_SECRET and WORKER_URL env vars. --- # Cloudflare Browser Rendering @@ -10,87 +10,74 @@ Control headless browsers via Cloudflare's Browser Rendering service using CDP ( ## Prerequisites - `CDP_SECRET` environment variable set -- Browser profile configured in openclaw.json with `cdpUrl` pointing to the worker endpoint: - ```json - "browser": { - "profiles": { - "cloudflare": { - "cdpUrl": "https://your-worker.workers.dev/cdp?secret=..." - } - } - } - ``` - -## Quick Start +- `WORKER_URL` environment variable set (e.g. `https://your-worker.workers.dev`) -### Screenshot -```bash -node /path/to/skills/cloudflare-browser/scripts/screenshot.js https://example.com output.png -``` +## Web Research -### Multi-page Video +### Search the web ```bash -node /path/to/skills/cloudflare-browser/scripts/video.js "https://site1.com,https://site2.com" output.mp4 +node skills/cloudflare-browser/scripts/search.js "santa barbara county ag enterprise ordinance 2024" +node skills/cloudflare-browser/scripts/search.js "sta rita hills vineyard comparable sales" --max 15 +node skills/cloudflare-browser/scripts/search.js "qualified opportunity zone map california" --json ``` -## CDP Connection Pattern +Returns markdown-formatted results with title, URL, and snippet. Use `--json` for structured output. Use `--max N` to control result count. -The worker creates a page target automatically on WebSocket connect. Listen for Target.targetCreated event to get the targetId: - -```javascript -const WebSocket = require('ws'); -const CDP_SECRET = process.env.CDP_SECRET; -const WS_URL = `wss://your-worker.workers.dev/cdp?secret=${encodeURIComponent(CDP_SECRET)}`; - -const ws = new WebSocket(WS_URL); -let targetId = null; - -ws.on('message', (data) => { - const msg = JSON.parse(data.toString()); - if (msg.method === 'Target.targetCreated' && msg.params?.targetInfo?.type === 'page') { - targetId = msg.params.targetInfo.targetId; - } -}); +### Fetch a web page (read articles, docs, regulations) +```bash +node skills/cloudflare-browser/scripts/fetch.js https://example.com +node skills/cloudflare-browser/scripts/fetch.js https://county-code.example.com/chapter-35 --save data-room/02-zoning-planning/ch35-text.md +node skills/cloudflare-browser/scripts/fetch.js https://example.com --html ``` -## Key CDP Commands +Extracts clean text content from web pages. Strips nav, footer, ads. Use `--save` to write directly to a file. Use `--html` for raw HTML (tables, structured data). -| Command | Purpose | -|---------|---------| -| Page.navigate | Navigate to URL | -| Page.captureScreenshot | Capture PNG/JPEG | -| Runtime.evaluate | Execute JavaScript | -| Emulation.setDeviceMetricsOverride | Set viewport size | +### Research workflow +1. **Search** for a topic → get URLs +2. **Fetch** the most relevant pages → get content +3. **File** findings in the data room with source attribution -## Common Patterns +## Visual Capture -### Navigate and Screenshot -```javascript -await send('Page.navigate', { url: 'https://example.com' }); -await new Promise(r => setTimeout(r, 3000)); // Wait for render -const { data } = await send('Page.captureScreenshot', { format: 'png' }); -fs.writeFileSync('out.png', Buffer.from(data, 'base64')); +### Screenshot +```bash +node skills/cloudflare-browser/scripts/screenshot.js https://example.com output.png ``` -### Scroll Page -```javascript -await send('Runtime.evaluate', { expression: 'window.scrollBy(0, 300)' }); +### Multi-page Video +```bash +node skills/cloudflare-browser/scripts/video.js "https://site1.com,https://site2.com" output.mp4 ``` -### Set Viewport +## CDP Client Library + +For custom scripts, import the reusable CDP client: + ```javascript -await send('Emulation.setDeviceMetricsOverride', { - width: 1280, - height: 720, - deviceScaleFactor: 1, - mobile: false -}); +const { createClient } = require('./cdp-client'); +const client = await createClient(); +await client.navigate('https://example.com'); +const text = await client.getText(); +const html = await client.getHTML(); +await client.evaluate('document.title'); +const screenshot = await client.screenshot(); +client.close(); ``` -## Creating Videos - -1. Capture frames as PNGs during navigation -2. Use ffmpeg to stitch: `ffmpeg -framerate 10 -i frame_%04d.png -c:v libx264 -pix_fmt yuv420p output.mp4` +### Client Methods + +| Method | Purpose | +|--------|---------| +| `navigate(url, waitMs)` | Navigate to URL, wait for render | +| `getText()` | Get page text content | +| `getHTML()` | Get full page HTML | +| `evaluate(expr)` | Run JavaScript on page | +| `screenshot(format)` | Capture PNG/JPEG | +| `click(selector)` | Click an element | +| `type(selector, text)` | Type into an input | +| `scroll(pixels)` | Scroll the page | +| `setViewport(w, h)` | Set viewport dimensions | +| `close()` | Close the connection | ## Troubleshooting diff --git a/skills/cloudflare-browser/scripts/fetch.js b/skills/cloudflare-browser/scripts/fetch.js new file mode 100644 index 000000000..4aef196a7 --- /dev/null +++ b/skills/cloudflare-browser/scripts/fetch.js @@ -0,0 +1,85 @@ +#!/usr/bin/env node +/** + * Cloudflare Browser Rendering - Web Page Fetch + * + * Navigates to a URL and extracts the text content. Use for reading + * articles, documentation, county code sections, regulatory pages, etc. + * + * Usage: node fetch.js [--html] [--save output.md] + * + * Default output: cleaned text content (innerText). + * With --html: raw HTML (for structured content like tables). + * With --save: writes to file instead of stdout. + */ + +const { createClient } = require('./cdp-client'); +const fs = require('fs'); +const path = require('path'); + +const args = process.argv.slice(2); +const htmlMode = args.includes('--html'); +const saveIdx = args.indexOf('--save'); +const savePath = saveIdx !== -1 ? args[saveIdx + 1] : null; +const url = args.find(a => a.startsWith('http')); + +if (!url) { + console.error('Usage: node fetch.js [--html] [--save output.md]'); + process.exit(1); +} + +async function fetchPage() { + const client = await createClient(); + + try { + await client.navigate(url, 5000); + + let content; + if (htmlMode) { + content = await client.getHTML(); + } else { + // Extract clean text, removing nav/footer/script noise + const result = await client.evaluate(` + (() => { + // Remove noisy elements + ['nav', 'footer', 'header', 'script', 'style', 'noscript', '.cookie-banner', '.ad', '#cookie-consent'] + .forEach(sel => document.querySelectorAll(sel).forEach(el => el.remove())); + + // Get main content if available, otherwise body + const main = document.querySelector('main, article, [role="main"], .content, #content'); + const source = main || document.body; + + // Get text and clean up whitespace + return source.innerText + .replace(/\\n{3,}/g, '\\n\\n') + .trim(); + })() + `); + content = result.result?.value || ''; + } + + if (!content) { + console.error('No content extracted from:', url); + client.close(); + process.exit(1); + } + + // Add source header + const output = `# Source: ${url}\n\n${content}`; + + if (savePath) { + const fullPath = path.resolve(savePath); + fs.writeFileSync(fullPath, output); + console.log(`Saved ${(output.length / 1024).toFixed(1)} KB to ${fullPath}`); + } else { + console.log(output); + } + + client.close(); + } catch (err) { + console.error('Fetch error:', err.message); + client.close(); + process.exit(1); + } +} + +fetchPage(); diff --git a/skills/cloudflare-browser/scripts/search.js b/skills/cloudflare-browser/scripts/search.js new file mode 100644 index 000000000..0a73d7ab4 --- /dev/null +++ b/skills/cloudflare-browser/scripts/search.js @@ -0,0 +1,103 @@ +#!/usr/bin/env node +/** + * Cloudflare Browser Rendering - Web Search + * + * Uses DuckDuckGo HTML (no JS required, no captchas) to search the web + * and return structured results the agent can use for research. + * + * Usage: node search.js "query" [--max 10] [--json] + * + * Output: Markdown-formatted search results with title, URL, and snippet. + * With --json: JSON array of {title, url, snippet} objects. + */ + +const { createClient } = require('./cdp-client'); +const path = require('path'); + +const args = process.argv.slice(2); +const jsonMode = args.includes('--json'); +const maxIdx = args.indexOf('--max'); +const maxResults = maxIdx !== -1 ? parseInt(args[maxIdx + 1], 10) : 10; +const query = args.filter(a => a !== '--json' && a !== '--max' && (maxIdx === -1 || args.indexOf(a) !== maxIdx + 1)).join(' '); + +if (!query) { + console.error('Usage: node search.js "search query" [--max 10] [--json]'); + process.exit(1); +} + +async function search() { + const client = await createClient(); + + try { + // DuckDuckGo HTML version - lightweight, no JS needed, no captchas + const searchUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`; + await client.navigate(searchUrl, 4000); + + // Extract results via DOM + const resultData = await client.evaluate(` + JSON.stringify( + Array.from(document.querySelectorAll('.result')).slice(0, ${maxResults}).map(r => { + const link = r.querySelector('.result__a'); + const snippet = r.querySelector('.result__snippet'); + const urlEl = r.querySelector('.result__url'); + return { + title: link ? link.innerText.trim() : '', + url: link ? link.href : (urlEl ? urlEl.innerText.trim() : ''), + snippet: snippet ? snippet.innerText.trim() : '', + }; + }).filter(r => r.title && r.url) + ) + `); + + const results = JSON.parse(resultData.result?.value || '[]'); + + if (results.length === 0) { + // Fallback: try extracting any links from the page + const fallback = await client.evaluate(` + JSON.stringify( + Array.from(document.querySelectorAll('a[href]')).slice(0, ${maxResults}).map(a => ({ + title: a.innerText.trim(), + url: a.href, + snippet: '' + })).filter(r => r.title && r.url && !r.url.includes('duckduckgo')) + ) + `); + const fallbackResults = JSON.parse(fallback.result?.value || '[]'); + + if (fallbackResults.length === 0) { + console.error('No results found for:', query); + client.close(); + process.exit(1); + } + + outputResults(fallbackResults); + } else { + outputResults(results); + } + + client.close(); + } catch (err) { + console.error('Search error:', err.message); + client.close(); + process.exit(1); + } +} + +function outputResults(results) { + if (jsonMode) { + console.log(JSON.stringify(results, null, 2)); + } else { + console.log(`## Search: "${query}"\n`); + console.log(`Found ${results.length} results\n`); + results.forEach((r, i) => { + console.log(`### ${i + 1}. ${r.title}`); + console.log(`> ${r.url}`); + if (r.snippet) { + console.log(`\n${r.snippet}`); + } + console.log(''); + }); + } +} + +search(); diff --git a/src/gateway/sync.test.ts b/src/gateway/sync.test.ts index 054bcd3ec..741fcb3c7 100644 --- a/src/gateway/sync.test.ts +++ b/src/gateway/sync.test.ts @@ -137,6 +137,50 @@ describe('syncToR2', () => { expect(configCmd).toContain('r2:moltbot-data/openclaw/'); }); + it('syncs workspace and skills under openclaw/ prefix', async () => { + const { sandbox, execMock } = createMockSandbox(); + execMock + .mockResolvedValueOnce(createMockExecResult('yes')) + .mockResolvedValueOnce(createMockExecResult('openclaw')) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult('2026-01-27')); + + const env = createMockEnvWithR2(); + await syncToR2(sandbox, env); + + // Workspace sync (call index 3) must target openclaw/workspace/ + const workspaceCmd = execMock.mock.calls[3][0]; + expect(workspaceCmd).toContain('r2:moltbot-data/openclaw/workspace/'); + expect(workspaceCmd).not.toMatch(/r2:moltbot-data\/workspace\//); + + // Skills sync (call index 4) must target openclaw/skills/ + const skillsCmd = execMock.mock.calls[4][0]; + expect(skillsCmd).toContain('r2:moltbot-data/openclaw/skills/'); + expect(skillsCmd).not.toMatch(/r2:moltbot-data\/skills\/[^.]/); + }); + + it('config sync excludes workspace and skills subdirectories', async () => { + const { sandbox, execMock } = createMockSandbox(); + execMock + .mockResolvedValueOnce(createMockExecResult('yes')) + .mockResolvedValueOnce(createMockExecResult('openclaw')) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult()) + .mockResolvedValueOnce(createMockExecResult('2026-01-27')); + + const env = createMockEnvWithR2(); + await syncToR2(sandbox, env); + + const configCmd = execMock.mock.calls[2][0]; + expect(configCmd).toContain("--exclude='workspace/**'"); + expect(configCmd).toContain("--exclude='skills/**'"); + }); + it('uses custom bucket name', async () => { const { sandbox, execMock } = createMockSandbox(); execMock diff --git a/src/gateway/sync.ts b/src/gateway/sync.ts index 99a2f6498..fd0d7d112 100644 --- a/src/gateway/sync.ts +++ b/src/gateway/sync.ts @@ -52,8 +52,9 @@ export async function syncToR2(sandbox: Sandbox, env: MoltbotEnv): Promise rcloneRemote(env, prefix); // Sync config (rclone sync propagates deletions) + // Exclude workspace/ and skills/ subdirs so they aren't deleted by the config sync const configResult = await sandbox.exec( - `rclone sync ${configDir}/ ${remote('openclaw/')} ${RCLONE_FLAGS} --exclude='*.lock' --exclude='*.log' --exclude='*.tmp' --exclude='.git/**'`, + `rclone sync ${configDir}/ ${remote('openclaw/')} ${RCLONE_FLAGS} --exclude='*.lock' --exclude='*.log' --exclude='*.tmp' --exclude='.git/**' --exclude='workspace/**' --exclude='skills/**'`, { timeout: 120000 }, ); if (!configResult.success) { @@ -64,15 +65,15 @@ export async function syncToR2(sandbox: Sandbox, env: MoltbotEnv): Promise /dev/null 2>&1; then echo "OpenClaw gateway is already running, exiting." @@ -62,13 +63,14 @@ if r2_configured; then echo "Checking R2 for existing backup..." # Check if R2 has an openclaw config backup + # IMPORTANT: exclude workspace/ and skills/ from config restore — they're restored separately below if rclone ls "r2:${R2_BUCKET}/openclaw/openclaw.json" $RCLONE_FLAGS 2>/dev/null | grep -q openclaw.json; then echo "Restoring config from R2..." - rclone copy "r2:${R2_BUCKET}/openclaw/" "$CONFIG_DIR/" $RCLONE_FLAGS -v 2>&1 || echo "WARNING: config restore failed with exit code $?" + rclone copy "r2:${R2_BUCKET}/openclaw/" "$CONFIG_DIR/" $RCLONE_FLAGS --exclude='workspace/**' --exclude='skills/**' -v 2>&1 || echo "WARNING: config restore failed with exit code $?" echo "Config restored" elif rclone ls "r2:${R2_BUCKET}/clawdbot/clawdbot.json" $RCLONE_FLAGS 2>/dev/null | grep -q clawdbot.json; then echo "Restoring from legacy R2 backup..." - rclone copy "r2:${R2_BUCKET}/clawdbot/" "$CONFIG_DIR/" $RCLONE_FLAGS -v 2>&1 || echo "WARNING: legacy config restore failed with exit code $?" + rclone copy "r2:${R2_BUCKET}/clawdbot/" "$CONFIG_DIR/" $RCLONE_FLAGS --exclude='workspace/**' --exclude='skills/**' -v 2>&1 || echo "WARNING: legacy config restore failed with exit code $?" if [ -f "$CONFIG_DIR/clawdbot.json" ] && [ ! -f "$CONFIG_FILE" ]; then mv "$CONFIG_DIR/clawdbot.json" "$CONFIG_FILE" fi @@ -77,22 +79,39 @@ if r2_configured; then echo "No backup found in R2, starting fresh" fi - # Restore workspace - REMOTE_WS_COUNT=$(rclone ls "r2:${R2_BUCKET}/workspace/" $RCLONE_FLAGS 2>/dev/null | wc -l) - if [ "$REMOTE_WS_COUNT" -gt 0 ]; then - echo "Restoring workspace from R2 ($REMOTE_WS_COUNT files)..." - mkdir -p "$WORKSPACE_DIR" - rclone copy "r2:${R2_BUCKET}/workspace/" "$WORKSPACE_DIR/" $RCLONE_FLAGS -v 2>&1 || echo "WARNING: workspace restore failed with exit code $?" - echo "Workspace restored" - fi + # Restore workspace + skills in background (don't block gateway startup) + ( + REMOTE_WS_COUNT=$(rclone ls "r2:${R2_BUCKET}/openclaw/workspace/" $RCLONE_FLAGS 2>/dev/null | wc -l) + if [ "$REMOTE_WS_COUNT" -gt 0 ]; then + echo "Restoring workspace from R2 ($REMOTE_WS_COUNT files)..." + mkdir -p "$WORKSPACE_DIR" + rclone copy "r2:${R2_BUCKET}/openclaw/workspace/" "$WORKSPACE_DIR/" $RCLONE_FLAGS 2>&1 || echo "WARNING: workspace restore failed" + echo "Workspace restored" + fi - # Restore skills - REMOTE_SK_COUNT=$(rclone ls "r2:${R2_BUCKET}/skills/" $RCLONE_FLAGS 2>/dev/null | wc -l) - if [ "$REMOTE_SK_COUNT" -gt 0 ]; then - echo "Restoring skills from R2 ($REMOTE_SK_COUNT files)..." - mkdir -p "$SKILLS_DIR" - rclone copy "r2:${R2_BUCKET}/skills/" "$SKILLS_DIR/" $RCLONE_FLAGS -v 2>&1 || echo "WARNING: skills restore failed with exit code $?" - echo "Skills restored" + REMOTE_SK_COUNT=$(rclone ls "r2:${R2_BUCKET}/openclaw/skills/" $RCLONE_FLAGS 2>/dev/null | wc -l) + if [ "$REMOTE_SK_COUNT" -gt 0 ]; then + echo "Restoring skills from R2 ($REMOTE_SK_COUNT files)..." + mkdir -p "$SKILLS_DIR" + rclone copy "r2:${R2_BUCKET}/openclaw/skills/" "$SKILLS_DIR/" $RCLONE_FLAGS 2>&1 || echo "WARNING: skills restore failed" + echo "Skills restored" + fi + ) & + echo "Workspace/skills restore started in background" + + # One-time migration: move top-level skills/workspace into openclaw/ prefix + # Use rclone cat (not lsf) to check marker — lsf treats file paths as directory prefixes + if ! rclone cat "r2:${R2_BUCKET}/openclaw/.migrated-prefixes" $RCLONE_FLAGS 2>/dev/null | grep -q migrated; then + echo "Running one-time R2 prefix migration (background)..." + # Run migration in background so it doesn't block gateway startup + ( + rclone copy "r2:${R2_BUCKET}/workspace/" "r2:${R2_BUCKET}/openclaw/workspace/" $RCLONE_FLAGS 2>/dev/null || true + rclone copy "r2:${R2_BUCKET}/skills/" "r2:${R2_BUCKET}/openclaw/skills/" $RCLONE_FLAGS 2>/dev/null || true + rclone purge "r2:${R2_BUCKET}/workspace/" $RCLONE_FLAGS 2>/dev/null || true + rclone purge "r2:${R2_BUCKET}/skills/" $RCLONE_FLAGS 2>/dev/null || true + echo "migrated $(date -Iseconds)" | rclone rcat "r2:${R2_BUCKET}/openclaw/.migrated-prefixes" $RCLONE_FLAGS + echo "R2 prefix migration complete" + ) & fi else echo "R2 not configured, starting fresh" @@ -210,10 +229,10 @@ if (process.env.CF_AI_GATEWAY_MODEL) { api: api, models: [{ id: modelId, name: modelId, contextWindow: 131072, maxTokens: 8192 }], }; - config.agents = config.agents || {}; - config.agents.defaults = config.agents.defaults || {}; - config.agents.defaults.model = { primary: providerName + '/' + modelId }; - console.log('AI Gateway model override: provider=' + providerName + ' model=' + modelId + ' via ' + baseUrl); + // NOTE: config.agents.defaults.model removed — the { primary: '...' } format + // is rejected by OpenClaw's strict config validation. The provider is still + // registered under config.models.providers so it can be selected in the UI. + console.log('AI Gateway provider registered: ' + providerName + ' model=' + modelId + ' via ' + baseUrl); } else { console.warn('CF_AI_GATEWAY_MODEL set but missing required config (account ID, gateway ID, or API key)'); } @@ -260,6 +279,64 @@ if (process.env.SLACK_BOT_TOKEN && process.env.SLACK_APP_TOKEN) { }; } +// ── Cron: daily research sprint ── +// Only seed cron config if no jobs exist yet (won't overwrite user edits) +const cronPath = '/root/.openclaw/cron/jobs.json'; +const SEARCH_CMD = 'node /root/clawd/skills/cloudflare-browser/scripts/search.js'; +const FETCH_CMD = 'node /root/clawd/skills/cloudflare-browser/scripts/fetch.js'; +const cronJobs = [ + { + id: 'daily-research', + name: 'Daily Research Sprint', + enabled: true, + schedule: { cron: '0 7 * * *', tz: 'America/Los_Angeles' }, + session: 'isolated', + message: [ + 'Daily research sprint. Follow HEARTBEAT.md daily checklist.', + 'Pick the 2-3 highest-impact open questions from MEMORY.md.', + 'For each question:', + `1. Search: run \`${SEARCH_CMD} "your query"\` to find relevant sources.`, + `2. Read: run \`${FETCH_CMD} \` on the best results to get full content.`, + '3. Synthesize: write findings with source URLs to /data-room/09-research/.', + '4. Update MEMORY.md: change status from OPEN to RESOLVED (or note what is still needed).', + 'After research, check if data room folders 01-08 can be populated from your findings.', + 'End by sending a 3-5 bullet Telegram summary of what you accomplished and what is still blocked.', + ].join(' '), + }, + { + id: 'evening-synthesis', + name: 'Evening Synthesis', + enabled: true, + schedule: { cron: '0 18 * * *', tz: 'America/Los_Angeles' }, + session: 'isolated', + message: [ + 'Evening synthesis. Review all workspace changes made today.', + 'Cross-reference new findings against the three business plans in AGENTS.md.', + 'Update any financial assumptions, timeline estimates, or risk assessments.', + 'If any deliverable in /data-room/10-deliverables/ needs revision based on new data, draft the update.', + 'Send owner a brief Telegram summary: key findings, decisions needed, and tomorrow priorities.', + ].join(' '), + }, +]; +try { + const cronData = JSON.parse(fs.readFileSync(cronPath, 'utf8')); + if (cronData.jobs && cronData.jobs.length === 0) { + cronData.jobs = cronJobs; + fs.mkdirSync('/root/.openclaw/cron', { recursive: true }); + fs.writeFileSync(cronPath, JSON.stringify(cronData, null, 2)); + console.log('Cron jobs configured: daily-research (7am PT), evening-synthesis (6pm PT)'); + } +} catch (e) { + const cronData = { version: 1, jobs: cronJobs }; + fs.mkdirSync('/root/.openclaw/cron', { recursive: true }); + fs.writeFileSync(cronPath, JSON.stringify(cronData, null, 2)); + console.log('Cron jobs created: daily-research (7am PT), evening-synthesis (6pm PT)'); +} + +// NOTE: config.cron, config.agents.defaults.model, and +// config.agents.defaults.workspace removed — OpenClaw's strict config +// validation rejects these fields, preventing gateway startup. + fs.writeFileSync(configPath, JSON.stringify(config, null, 2)); console.log('Configuration patched successfully'); EOFPATCH @@ -277,6 +354,20 @@ if r2_configured; then while true; do sleep 30 + # ── PULL: Merge externally-uploaded files from R2 into container ── + # Runs EVERY cycle before push. rclone copy is additive (no deletes) + # so it only downloads files that are in R2 but not local. + # This ensures files uploaded via wrangler/API survive the push sync. + if [ -d "$WORKSPACE_DIR" ]; then + rclone copy "r2:${R2_BUCKET}/openclaw/workspace/" "$WORKSPACE_DIR/" \ + $RCLONE_FLAGS --exclude='skills/**' --exclude='.git/**' --exclude='node_modules/**' 2>> "$LOGFILE" + fi + if [ -d "$SKILLS_DIR" ]; then + rclone copy "r2:${R2_BUCKET}/openclaw/skills/" "$SKILLS_DIR/" \ + $RCLONE_FLAGS 2>> "$LOGFILE" + fi + + # ── PUSH: Upload local changes to R2 ── CHANGED=/tmp/.changed-files { find "$CONFIG_DIR" -newer "$MARKER" -type f -printf '%P\n' 2>/dev/null @@ -291,13 +382,13 @@ if r2_configured; then if [ "$COUNT" -gt 0 ]; then echo "[sync] Uploading changes ($COUNT files) at $(date)" >> "$LOGFILE" rclone sync "$CONFIG_DIR/" "r2:${R2_BUCKET}/openclaw/" \ - $RCLONE_FLAGS --exclude='*.lock' --exclude='*.log' --exclude='*.tmp' --exclude='.git/**' 2>> "$LOGFILE" + $RCLONE_FLAGS --exclude='*.lock' --exclude='*.log' --exclude='*.tmp' --exclude='.git/**' --exclude='workspace/**' --exclude='skills/**' 2>> "$LOGFILE" if [ -d "$WORKSPACE_DIR" ]; then - rclone sync "$WORKSPACE_DIR/" "r2:${R2_BUCKET}/workspace/" \ + rclone sync "$WORKSPACE_DIR/" "r2:${R2_BUCKET}/openclaw/workspace/" \ $RCLONE_FLAGS --exclude='skills/**' --exclude='.git/**' --exclude='node_modules/**' 2>> "$LOGFILE" fi if [ -d "$SKILLS_DIR" ]; then - rclone sync "$SKILLS_DIR/" "r2:${R2_BUCKET}/skills/" \ + rclone sync "$SKILLS_DIR/" "r2:${R2_BUCKET}/openclaw/skills/" \ $RCLONE_FLAGS 2>> "$LOGFILE" fi date -Iseconds > "$LAST_SYNC_FILE" @@ -314,6 +405,8 @@ fi # ============================================================ echo "Starting OpenClaw Gateway..." echo "Gateway will be available on port 18789" +echo "--- Config top-level keys ---" +node -e "const c=JSON.parse(require('fs').readFileSync('$CONFIG_FILE','utf8'));console.log(Object.keys(c).join(', '))" 2>/dev/null || echo "(could not read config)" rm -f /tmp/openclaw-gateway.lock 2>/dev/null || true rm -f "$CONFIG_DIR/gateway.lock" 2>/dev/null || true