diff --git a/.github/workflows/ocr-review.yml b/.github/workflows/ocr-review.yml index 6c0806d2..4ffcc22d 100644 --- a/.github/workflows/ocr-review.yml +++ b/.github/workflows/ocr-review.yml @@ -1,10 +1,11 @@ -# OpenCodeReview - GitHub Actions PR Auto-Review Demo +# OpenCodeReview - GitHub Actions PR Auto-Review Pipeline # # This workflow automatically reviews pull requests using OpenCodeReview # and posts review comments directly on the PR. # # Triggers: # - PR opened (uses pull_request_target for fork secret access) +# - Comment on PR containing '/open-code-review' or '@open-code-review' # # Required secrets: # OCR_LLM_URL - LLM API endpoint (e.g., https://api.openai.com/v1/chat/completions) @@ -12,8 +13,34 @@ # # Optional secrets: # OCR_LLM_MODEL - Model name (default: gpt-4o) +# OCR_LLM_USE_ANTHROPIC - Set to 'true' if using Anthropic Claude models +# +# Optional variables (for retry/delay tuning): +# The retry strategy follows GitHub's documented guidance for REST API rate limits: +# https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api +# - Primary rate limit exhausted (x-ratelimit-remaining=0): wait until x-ratelimit-reset. +# - Secondary rate limit with a retry-after header: wait exactly that long. +# - Secondary rate limit with NO header: wait at least one minute, then use +# exponential backoff on continued failures. +# +# OCR_RETRY_BASE_DELAY - Base delay (ms) for exponential backoff when no retry +# header is present (default: 60000, per GitHub's +# "at least one minute" recommendation for secondary limits). +# OCR_RETRY_MAX_DELAY - Maximum delay (ms) cap applied to EVERY computed wait, +# including retry-after and x-ratelimit-reset, so a far-future +# reset cannot stall the job past its timeout (default: 300000 = 5 min). +# OCR_MAX_RETRIES - Max retry attempts per comment when rate-limited (default: 3). +# OCR_SUCCESS_DELAY - Delay (ms) between successful comment posts to pace requests (default: 2000). +# OCR_FAILURE_DELAY - Delay (ms) after a non-retryable failure to pace subsequent requests (default: 1000). +# OCR_LOW_REMAINING_THRESHOLD - When x-ratelimit-remaining is at or below this value, +# proactively increase request spacing to avoid hitting the limit +# (default: 3; GitHub best practice is to watch the header and slow down). +# OCR_LOW_REMAINING_SPACING - Request spacing (ms) used when remaining quota is low +# (default: 10000 = 10s). # # Note: GITHUB_TOKEN is automatically provided by GitHub Actions. +# Note: The workflow also configures llm.extra_body to '{"thinking": {"type": "disabled"}}' +# to disable thinking mode for compatibility with various LLM providers. name: OpenCodeReview PR Review @@ -182,7 +209,7 @@ jobs: const failedComments = []; try { - await github.rest.pulls.createReview({ + const batchRes = await github.rest.pulls.createReview({ owner: context.repo.owner, repo: context.repo.repo, pull_number: prNumber, @@ -193,28 +220,85 @@ jobs: }); successCount = reviewComments.length; console.log(`Successfully posted review with ${successCount} inline comments (${commentsWithoutLine.length} in summary)`); + logRateLimitQuota(batchRes, 'after batch createReview'); } catch (e) { console.log('Failed to post review with inline comments:', e.message); - console.log('Falling back to posting comments individually...'); + console.log('Falling back to posting comments individually with rate-limit-aware retry...'); - // Fallback: post comments one by one + // Fallback: post comments one by one with delay to avoid secondary rate limits. + // GitHub enforces ~80 content-generating requests per minute; spacing calls + // helps stay under that threshold. Retry/wait durations are derived from the + // rate-limit response headers per GitHub's documented strategy. + const MAX_RETRIES = parseInt(process.env.OCR_MAX_RETRIES, 10) || 3; + const SUCCESS_DELAY = parseInt(process.env.OCR_SUCCESS_DELAY, 10) || 2000; // delay after successful post + const FAILURE_DELAY = parseInt(process.env.OCR_FAILURE_DELAY, 10) || 1000; // delay after non-retryable failure + const LOW_REMAINING_THRESHOLD = parseInt(process.env.OCR_LOW_REMAINING_THRESHOLD, 10) || 3; + const LOW_REMAINING_SPACING = parseInt(process.env.OCR_LOW_REMAINING_SPACING, 10) || 10000; + + // If the batch itself was rate-limited, honor its rate-limit headers + // (retry-after / x-ratelimit-reset) before retrying per-comment, + // otherwise the first per-comment call re-hits the same wall immediately. + const batchRetry = computeRetryDelayMs(e, 0); + if (batchRetry != null) { + const secs = (batchRetry.delayMs / 1000).toFixed(1); + console.log( + `Batch createReview was rate-limited (HTTP ${e.status}). ` + + `Cooling down ${secs}s via '${batchRetry.source}' (${batchRetry.detail}) before per-comment retry.` + ); + await sleep(batchRetry.delayMs); + } + for (const { comment, reviewComment } of reviewComments) { - try { - await github.rest.pulls.createReview({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: prNumber, - commit_id: commitSha, - body: '', - event: 'COMMENT', - comments: [reviewComment] - }); - successCount++; - console.log(`Successfully posted comment for ${reviewComment.path}`); - } catch (innerE) { - failedCount++; - failedComments.push({ comment, error: innerE.message }); - console.log(`Failed to post comment for ${reviewComment.path}: ${innerE.message}`); + let posted = false; + for (let attempt = 0; attempt <= MAX_RETRIES && !posted; attempt++) { + try { + const res = await github.rest.pulls.createReview({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + commit_id: commitSha, + body: '', + event: 'COMMENT', + comments: [reviewComment] + }); + successCount++; + posted = true; + console.log(`Successfully posted comment for ${reviewComment.path}`); + // Proactive throttle: if remaining quota is low, slow down to + // avoid hitting the limit (GitHub best practice: watch the header). + const remaining = logRateLimitQuota(res, `after ${reviewComment.path}`); + const lowQuota = remaining != null && remaining <= LOW_REMAINING_THRESHOLD; + if (lowQuota) { + console.log(`[rate-limit] quota low (remaining=${remaining} <= ${LOW_REMAINING_THRESHOLD}); increasing spacing to ${LOW_REMAINING_SPACING}ms.`); + await sleep(LOW_REMAINING_SPACING); + } else { + await sleep(SUCCESS_DELAY); + } + } catch (innerE) { + // Decide whether to retry and how long to wait, based on GitHub's + // rate-limit documentation (retry-after / x-ratelimit-* headers). + const retryInfo = computeRetryDelayMs(innerE, attempt); + const willRetry = retryInfo != null && attempt < MAX_RETRIES; + if (willRetry) { + const secs = (retryInfo.delayMs / 1000).toFixed(1); + console.log( + `Rate-limited/transient error on ${reviewComment.path} ` + + `(HTTP ${innerE.status}, attempt ${attempt + 1}/${MAX_RETRIES}). ` + + `Waiting ${secs}s via '${retryInfo.source}' (${retryInfo.detail}). ` + + `Error: ${innerE.message}` + ); + await sleep(retryInfo.delayMs); + } else { + failedCount++; + failedComments.push({ comment, error: innerE.message }); + const reason = retryInfo == null ? 'non-retryable error' : 'rate-limit retries exhausted'; + console.log(`Failed to post comment for ${reviewComment.path} (${reason}, HTTP ${innerE.status || 'n/a'}): ${innerE.message}`); + // After exhausting retries use the success-style pace delay; + // for other errors use the shorter failure pace delay. + await sleep(retryInfo == null ? FAILURE_DELAY : SUCCESS_DELAY); + break; + } + } } } @@ -244,6 +328,115 @@ jobs: }); } + function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + // Case-insensitive header lookup. Octokit normalizes response headers to + // lowercase, but this defensive check also handles original casing so that + // quota logging and retry delay computation never silently miss a header. + function getHeader(headers, name) { + const v = headers[name] != null ? headers[name] : headers[name.toLowerCase()]; + return v != null ? String(v).trim() : undefined; + } + + // Decide whether an error is worth retrying and, if so, how long to wait. + // Implements GitHub's documented rate-limit retry strategy using the + // response headers (retry-after, x-ratelimit-remaining, x-ratelimit-reset). + // Returns { delayMs, source, detail } when retryable, or null otherwise. + // See: https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api + function computeRetryDelayMs(error, attempt) { + if (!error) return null; + const status = error.status; + const message = String(error.message || ''); + const isRateLimit = status === 429 || (status === 403 && /rate limit|abuse|secondary/i.test(message)); + const isTransient = (status >= 500 && status < 600) || status === 408; + if (!isRateLimit && !isTransient) return null; + + const headers = ((error.response || {}).headers) || {}; + const header = (name) => getHeader(headers, name); + const nowSec = Math.floor(Date.now() / 1000); + + // The absolute maximum wait for any single retry. Header-derived waits + // (retry-after / x-ratelimit-reset) are GitHub's recommended durations, + // but capping them prevents a far-future reset from stalling the CI job + // past its timeout. When we cap, the next retry may re-hit the limit. + const cap = parseInt(process.env.OCR_RETRY_MAX_DELAY, 10) || 300000; + const base = parseInt(process.env.OCR_RETRY_BASE_DELAY, 10) || 60000; + + // { rawMs, source, detail } describing the recommended wait before cap. + let info = null; + + if (isRateLimit) { + // (1) Honor "retry-after" when present (seconds, or an HTTP-date). + const retryAfter = header('retry-after'); + if (retryAfter) { + const secs = Number(retryAfter); + if (!isNaN(secs) && secs >= 0) { + info = { rawMs: secs * 1000, source: 'retry-after', detail: `${secs}s (from header)` }; + } else { + const dateMs = Date.parse(retryAfter); + if (!isNaN(dateMs)) { + info = { rawMs: Math.max(0, dateMs - Date.now()), source: 'retry-after (HTTP-date)', detail: retryAfter }; + } + } + } + + // (2) Primary limit exhausted (x-ratelimit-remaining=0): wait until reset. + if (!info) { + const remaining = header('x-ratelimit-remaining'); + const reset = header('x-ratelimit-reset'); + if (reset != null && Number(remaining) === 0) { + const rawMs = Math.max(0, Number(reset) - nowSec) * 1000; + info = { rawMs, source: 'x-ratelimit-reset', detail: `remaining=0, reset epoch=${reset} (in ${Math.ceil(rawMs / 1000)}s)` }; + } + } + + // (3) Secondary limit with no retry hint: docs say wait at least one + // minute, then increase exponentially between retries. + if (!info) { + const backoff = Math.min(base * Math.pow(2, attempt), cap); + const jitter = Math.floor(Math.random() * 1000); + info = { rawMs: backoff + jitter, source: 'exponential-backoff', detail: `base=${base}ms*2^${attempt} (cap ${cap}ms) +${jitter}ms jitter` }; + } + } else { + // Transient server error (5xx / 408): back off without the 60s floor. + // Use a shorter base than the rate-limit path: server hiccups are + // typically short-lived, so a 2s initial wait (doubling per retry) + // is sufficient and avoids stalling the CI job unnecessarily. + const transientBase = 2000; + const backoff = Math.min(transientBase * Math.pow(2, attempt), cap); + const jitter = Math.floor(Math.random() * 1000); + info = { rawMs: backoff + jitter, source: 'transient-backoff', detail: `base=${transientBase}ms*2^${attempt} (cap ${cap}ms) +${jitter}ms jitter (HTTP ${status})` }; + } + + // Apply the universal cap to header-derived waits too. + const delayMs = Math.min(info.rawMs, cap); + if (delayMs < info.rawMs) { + info.detail += ` [CAPPED to ${cap}ms; GitHub recommended ${Math.ceil(info.rawMs / 1000)}s]`; + } + return { delayMs, source: info.source, detail: info.detail }; + } + + // Best-effort logging of remaining rate-limit quota from a successful response. + // Returns the parsed x-ratelimit-remaining value (or null) for proactive throttling. + function logRateLimitQuota(response, tag) { + try { + const h = (response && response.headers) || {}; + const header = (name) => getHeader(h, name); + const remaining = header('x-ratelimit-remaining'); + const limit = header('x-ratelimit-limit'); + const reset = header('x-ratelimit-reset'); + if (remaining != null) { + console.log( + `[rate-limit] ${tag}: remaining=${remaining}/${limit != null ? limit : '?'}` + + (reset != null ? `, reset epoch=${reset}` : '') + ); + } + return remaining != null ? Number(remaining) : null; + } catch (_) { return null; } + } + function formatComment(comment) { let body = comment.content || '';