diff --git a/.github/workflows/ocr-review.yml b/.github/workflows/ocr-review.yml index 6c0806d2..1933c0fe 100644 --- a/.github/workflows/ocr-review.yml +++ b/.github/workflows/ocr-review.yml @@ -23,8 +23,9 @@ concurrency: on: # Use pull_request_target instead of pull_request so that secrets are - # available even for PRs from forks. This is safe because OCR only reads - # the diff and does not execute any code from the PR. + # available even for PRs from forks. Security: no code is checked out; + # file contents are fetched via the GitHub REST API and assembled into + # a synthetic repo for diff computation only. pull_request_target: types: [opened] @@ -39,17 +40,105 @@ jobs: image: node:20 if: github.event_name == 'pull_request_target' steps: - - name: Checkout repository - uses: actions/checkout@v4 + - name: Download base repository snapshot + run: | + mkdir -p /tmp/review-repo + curl -sL \ + -H "Authorization: Bearer ${{ github.token }}" \ + "https://api.github.com/repos/${{ github.repository }}/tarball/${{ github.event.pull_request.base.sha }}" \ + | tar xz --strip-components=1 -C /tmp/review-repo + cd /tmp/review-repo + git init + git config user.email "ocr@ci" + git config user.name "OCR CI" + git add -A + git commit -m "base" + + - name: Apply PR changes to review repository + uses: actions/github-script@v7 with: - fetch-depth: 0 # Full history needed for merge-base diff - ref: ${{ github.event.pull_request.head.sha }} + script: | + const fs = require('fs'); + const path = require('path'); + const { execSync } = require('child_process'); - - name: Mark repository as safe directory - run: git config --global --add safe.directory '*' + const repoDir = '/tmp/review-repo'; + const run = (cmd) => execSync(cmd, { cwd: repoDir, encoding: 'utf8' }); + + const owner = context.repo.owner; + const repo = context.repo.repo; + const prNumber = context.issue.number; + const headSha = context.payload.pull_request.head.sha; + const headOwner = context.payload.pull_request.head.repo.owner.login; + const headRepo = context.payload.pull_request.head.repo.name; + + const files = []; + for (let page = 1; ; page++) { + const resp = await github.rest.pulls.listFiles({ + owner, repo, pull_number: prNumber, per_page: 100, page + }); + files.push(...resp.data); + if (resp.data.length < 100) break; + } + console.log(`PR has ${files.length} changed file(s)`); - - name: Fetch PR head ref (ensures fork commits are available) - run: git fetch origin pull/${{ github.event.pull_request.number }}/head + function safePath(base, rel) { + const resolved = path.resolve(base, rel); + if (!resolved.startsWith(base + '/') && resolved !== base) return null; + return resolved; + } + + async function fetchContent(fileOwner, fileRepo, filePath, ref) { + try { + const resp = await github.rest.repos.getContent({ + owner: fileOwner, repo: fileRepo, path: filePath, ref + }); + if (resp.data.encoding === 'base64' && resp.data.content) { + return Buffer.from(resp.data.content, 'base64'); + } + if (resp.data.download_url) { + const raw = await fetch(resp.data.download_url); + if (!raw.ok) return null; + return Buffer.from(await raw.arrayBuffer()); + } + if (resp.data.content) { + return Buffer.from(resp.data.content, 'utf8'); + } + return null; + } catch (e) { + if (e.status === 404 || e.status === 403) return null; + throw e; + } + } + + for (const file of files) { + if (file.status === 'removed') { + const fullPath = safePath(repoDir, file.filename); + if (fullPath && fs.existsSync(fullPath)) fs.unlinkSync(fullPath); + continue; + } + if (file.status === 'renamed' && file.previous_filename) { + const oldPath = safePath(repoDir, file.previous_filename); + if (oldPath && fs.existsSync(oldPath)) fs.unlinkSync(oldPath); + } + const fullPath = safePath(repoDir, file.filename); + if (!fullPath) { + console.log(`Skipping ${file.filename}: path traversal detected`); + continue; + } + const content = await fetchContent(headOwner, headRepo, file.filename, headSha); + if (content === null) { + console.log(`Skipping ${file.filename}: could not fetch at head`); + continue; + } + fs.mkdirSync(path.dirname(fullPath), { recursive: true }); + fs.writeFileSync(fullPath, content); + } + run('git add -A'); + run('git commit --allow-empty -m "head"'); + + console.log('Review repo ready'); + run('git log --oneline'); - name: Install OpenCodeReview run: npm install -g @alibaba-group/open-code-review @@ -66,15 +155,12 @@ jobs: - name: Run OpenCodeReview id: review run: | - BASE_REF="${{ github.event.pull_request.base.ref }}" - HEAD_SHA="${{ github.event.pull_request.head.sha }}" - - echo "Reviewing PR: ${HEAD_SHA} against origin/${BASE_REF}" + echo "Reviewing PR in synthetic repo" - # Run OCR in range mode with JSON output + cd /tmp/review-repo ocr review \ - --from "origin/${BASE_REF}" \ - --to "${HEAD_SHA}" \ + --from HEAD~1 \ + --to HEAD \ --format json \ > /tmp/ocr-result.json 2>/tmp/ocr-stderr.log || true