diff --git a/gittensor/validator/issue_discovery/repo_scan.py b/gittensor/validator/issue_discovery/repo_scan.py index 2ac232de..4c846655 100644 --- a/gittensor/validator/issue_discovery/repo_scan.py +++ b/gittensor/validator/issue_discovery/repo_scan.py @@ -11,6 +11,7 @@ """ import asyncio +import time from datetime import datetime, timedelta, timezone from typing import Dict, List, Optional, Set, Tuple @@ -201,40 +202,74 @@ async def _lookup(issue_raw: dict) -> Tuple[dict, Optional[int], Optional[int]]: def _fetch_closed_issues(repo_name: str, since: str, token: str) -> List[dict]: - """Fetch closed issues from a repo via REST API with pagination.""" + """Fetch closed issues from a repo via REST API with pagination and retry. + + Retries transient 5xx errors and connection failures with exponential backoff, + matching the retry pattern used by other API functions in the codebase + (e.g. get_pull_request_file_changes, execute_graphql_query). + """ headers = {'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3+json'} all_issues: List[dict] = [] page = 1 + max_retries = 3 while True: - try: - response = requests.get( - f'{BASE_GITHUB_API_URL}/repos/{repo_name}/issues', - params={'state': 'closed', 'since': since, 'per_page': 100, 'page': page}, - headers=headers, - timeout=30, - ) - if response.status_code in (404, 422): - bt.logging.debug(f'Issue scan {repo_name} page {page}: HTTP {response.status_code}') - break - if response.status_code != 200: - bt.logging.warning(f'Issue scan {repo_name} page {page}: HTTP {response.status_code}') - break - - issues = response.json() - if not issues: - break - - all_issues.extend(issues) - page += 1 - - # Safety: don't paginate forever - if page > 100: - bt.logging.warning(f'Issue scan {repo_name}: hit 100-page limit') - break - - except requests.RequestException as e: - bt.logging.warning(f'Issue scan {repo_name} page {page}: {e}') + last_error: Optional[str] = None + for attempt in range(max_retries): + try: + response = requests.get( + f'{BASE_GITHUB_API_URL}/repos/{repo_name}/issues', + params={'state': 'closed', 'since': since, 'per_page': 100, 'page': page}, + headers=headers, + timeout=30, + ) + + if response.status_code == 200: + issues = response.json() + if not issues: + return all_issues + all_issues.extend(issues) + last_error = None + break # Success, move to next page + + if response.status_code in (404, 422): + bt.logging.debug(f'Issue scan {repo_name} page {page}: HTTP {response.status_code}') + return all_issues + + # Retryable server error + last_error = f'HTTP {response.status_code}' + if response.status_code in (502, 503, 504) and attempt < max_retries - 1: + backoff = min(5 * (2**attempt), 30) + bt.logging.warning( + f'Issue scan {repo_name} page {page}: {last_error} ' + f'(attempt {attempt + 1}/{max_retries}), retrying in {backoff}s...' + ) + time.sleep(backoff) + continue + + bt.logging.warning(f'Issue scan {repo_name} page {page}: {last_error}') + return all_issues + + except requests.RequestException as e: + last_error = str(e) + if attempt < max_retries - 1: + backoff = min(5 * (2**attempt), 30) + bt.logging.warning( + f'Issue scan {repo_name} page {page}: {e} ' + f'(attempt {attempt + 1}/{max_retries}), retrying in {backoff}s...' + ) + time.sleep(backoff) + continue + bt.logging.warning(f'Issue scan {repo_name} page {page}: {last_error}') + return all_issues + else: + # All retries exhausted + bt.logging.warning(f'Issue scan {repo_name} page {page} failed after {max_retries} attempts: {last_error}') + return all_issues + + page += 1 + if page > 100: + bt.logging.warning(f'Issue scan {repo_name}: hit 100-page limit') break return all_issues diff --git a/tests/validator/test_repo_scan_retry.py b/tests/validator/test_repo_scan_retry.py new file mode 100644 index 00000000..b938e041 --- /dev/null +++ b/tests/validator/test_repo_scan_retry.py @@ -0,0 +1,133 @@ +# The MIT License (MIT) +# Copyright © 2025 Entrius + +"""Unit tests for _fetch_closed_issues retry logic in repo_scan module.""" + +from unittest.mock import Mock, patch + +import pytest + +repo_scan = pytest.importorskip('gittensor.validator.issue_discovery.repo_scan', reason='Requires gittensor package') +_fetch_closed_issues = repo_scan._fetch_closed_issues + + +def _mock_response(status_code: int, json_data=None): + """Create a mock requests.Response.""" + resp = Mock() + resp.status_code = status_code + resp.json.return_value = json_data if json_data is not None else [] + return resp + + +class TestFetchClosedIssuesRetry: + @patch('time.sleep') + @patch('requests.get') + def test_success_on_first_attempt(self, mock_get, mock_sleep): + issues = [{'number': 1, 'title': 'issue1'}] + mock_get.side_effect = [ + _mock_response(200, issues), + _mock_response(200, []), # empty = end pagination + ] + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == issues + mock_sleep.assert_not_called() + + @patch('time.sleep') + @patch('requests.get') + def test_retry_on_502_then_success(self, mock_get, mock_sleep): + issues = [{'number': 1, 'title': 'issue1'}] + mock_get.side_effect = [ + _mock_response(502), + _mock_response(200, issues), + _mock_response(200, []), + ] + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == issues + assert mock_sleep.call_count == 1 + + @patch('time.sleep') + @patch('requests.get') + def test_retry_on_503_then_success(self, mock_get, mock_sleep): + issues = [{'number': 2, 'title': 'issue2'}] + mock_get.side_effect = [ + _mock_response(503), + _mock_response(503), + _mock_response(200, issues), + _mock_response(200, []), + ] + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == issues + assert mock_sleep.call_count == 2 + + @patch('time.sleep') + @patch('requests.get') + def test_all_retries_exhausted_returns_partial(self, mock_get, mock_sleep): + mock_get.side_effect = [ + _mock_response(502), + _mock_response(502), + _mock_response(502), + ] + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == [] + + @patch('time.sleep') + @patch('requests.get') + def test_404_returns_immediately_no_retry(self, mock_get, mock_sleep): + mock_get.return_value = _mock_response(404) + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == [] + assert mock_get.call_count == 1 + mock_sleep.assert_not_called() + + @patch('time.sleep') + @patch('requests.get') + def test_422_returns_immediately_no_retry(self, mock_get, mock_sleep): + mock_get.return_value = _mock_response(422) + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == [] + assert mock_get.call_count == 1 + mock_sleep.assert_not_called() + + @patch('time.sleep') + @patch('requests.get') + def test_connection_error_retries(self, mock_get, mock_sleep): + import requests as req + + issues = [{'number': 3, 'title': 'issue3'}] + mock_get.side_effect = [ + req.ConnectionError('connection reset'), + _mock_response(200, issues), + _mock_response(200, []), + ] + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == issues + assert mock_sleep.call_count == 1 + + @patch('time.sleep') + @patch('requests.get') + def test_exponential_backoff_timing(self, mock_get, mock_sleep): + mock_get.side_effect = [ + _mock_response(502), + _mock_response(502), + _mock_response(502), + ] + _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + # Backoff: 5*2^0=5, 5*2^1=10 (third attempt is last, no sleep after) + assert mock_sleep.call_count == 2 + mock_sleep.assert_any_call(5) + mock_sleep.assert_any_call(10) + + @patch('time.sleep') + @patch('requests.get') + def test_pagination_with_retry_on_second_page(self, mock_get, mock_sleep): + page1 = [{'number': 1}] + page2 = [{'number': 2}] + mock_get.side_effect = [ + _mock_response(200, page1), # page 1 OK + _mock_response(502), # page 2 fail + _mock_response(200, page2), # page 2 retry OK + _mock_response(200, []), # page 3 empty = done + ] + result = _fetch_closed_issues('owner/repo', '2025-01-01T00:00:00Z', 'token') + assert result == page1 + page2 + assert mock_sleep.call_count == 1