From 19ff951eae25c2d96f6b1f300c30023deca4866a Mon Sep 17 00:00:00 2001 From: Sangam Paudel Date: Fri, 17 Oct 2025 22:39:29 +0545 Subject: [PATCH 1/3] Add dry-run Feature --- forklet/__main__.py | 11 +++- forklet/core/orchestrator.py | 33 +++++++++--- forklet/interfaces/cli.py | 4 +- forklet/models/download.py | 3 ++ tests/core/test_orchestrator_dry_run.py | 71 +++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 tests/core/test_orchestrator_dry_run.py diff --git a/forklet/__main__.py b/forklet/__main__.py index bc86f73..1134ed6 100644 --- a/forklet/__main__.py +++ b/forklet/__main__.py @@ -47,6 +47,7 @@ def cli(ctx, verbose: bool, token: Optional[str]): help='Download strategy') @click.option('--concurrent', '-c', default=5, help='Concurrent downloads') @click.option('--overwrite', '-f', is_flag=True, help='Overwrite existing files') +@click.option('--dry-run', '-n', is_flag=True, help='Preview files to download without writing') @click.pass_context def download( ctx, @@ -65,7 +66,8 @@ def download( strategy: str, concurrent: int, overwrite: bool, - no_progress: bool + no_progress: bool, + dry_run: bool ): """ Download files from a GitHub repository. @@ -104,7 +106,8 @@ async def run_download(): token = token, concurrent = concurrent, overwrite = overwrite, - progress = not no_progress + progress = not no_progress, + dry_run = dry_run ) asyncio.run(run_download()) @@ -192,3 +195,7 @@ def version(): #### MAIN ENTRYPOINT FOR THE FORKLET CLI def main(): cli() + + +if __name__ == "__main__": + main() diff --git a/forklet/core/orchestrator.py b/forklet/core/orchestrator.py index 7040928..4314357 100644 --- a/forklet/core/orchestrator.py +++ b/forklet/core/orchestrator.py @@ -144,12 +144,8 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult: f"Filtered {filter_result.filtered_files}/{filter_result.total_files} " "files for download" ) - - # Prepare destination - if request.create_destination: - await self.download_service.ensure_directory(request.destination) - - # Create download result and set as current + + # Create download result and set as current (so control operations can act) result = DownloadResult( request=request, status=DownloadStatus.IN_PROGRESS, @@ -157,6 +153,31 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult: started_at=datetime.now() ) self._current_result = result + + # If dry-run is explicitly requested, prepare a summary and return without writing files + if getattr(request, 'dry_run', None) is True: + # Determine which files would be skipped due to existing local files + skipped = [] + for f in target_files: + if request.preserve_structure: + target_path = request.destination / f.path + else: + target_path = request.destination / Path(f.path).name + if target_path.exists() and not request.overwrite_existing: + skipped.append(f.path) + + # Update and return the result summarizing what would happen + result.status = DownloadStatus.COMPLETED + result.downloaded_files = [] + result.skipped_files = skipped + result.failed_files = {} + result.completed_at = datetime.now() + logger.info(f"Dry-run: {len(target_files)} files matched, {len(skipped)} would be skipped") + return result + + # Prepare destination + if request.create_destination: + await self.download_service.ensure_directory(request.destination) # Reset state tracking self._completed_files.clear() diff --git a/forklet/interfaces/cli.py b/forklet/interfaces/cli.py index 270c776..37cbf89 100644 --- a/forklet/interfaces/cli.py +++ b/forklet/interfaces/cli.py @@ -126,7 +126,8 @@ async def execute_download( token: Optional[str], concurrent: int, overwrite: bool, - progress: bool = True + progress: bool = True, + dry_run: bool = False ) -> None: """ Execute the download operation. @@ -168,6 +169,7 @@ async def execute_download( max_concurrent_downloads = concurrent, overwrite_existing = overwrite, show_progress_bars = progress + ,dry_run = dry_run ) # Execute download diff --git a/forklet/models/download.py b/forklet/models/download.py index e628d7f..44a0b83 100644 --- a/forklet/models/download.py +++ b/forklet/models/download.py @@ -106,6 +106,9 @@ class DownloadRequest: # Authentication token: Optional[str] = None + # Dry-run preview mode (do not write files) + dry_run: bool = False + # Metadata request_id: str = field(default_factory=lambda: f"req_{datetime.now().strftime('%Y%m%d_%H%M%S')}") created_at: datetime = field(default_factory=datetime.now) diff --git a/tests/core/test_orchestrator_dry_run.py b/tests/core/test_orchestrator_dry_run.py new file mode 100644 index 0000000..2a70bd9 --- /dev/null +++ b/tests/core/test_orchestrator_dry_run.py @@ -0,0 +1,71 @@ +import asyncio +import pytest +from pathlib import Path +from datetime import datetime + +from forklet.core.orchestrator import DownloadOrchestrator +from forklet.services.github_api import GitHubAPIService +from forklet.services.download import DownloadService +from forklet.infrastructure.retry_manager import RetryManager +from forklet.infrastructure.rate_limiter import RateLimiter +from forklet.models.github import GitHubFile +from forklet.models.download import DownloadRequest, FilterCriteria +from forklet.models.github import RepositoryInfo, GitReference, RepositoryType + + +@pytest.mark.asyncio +async def test_orchestrator_dry_run(tmp_path, monkeypatch): + # Arrange: create mock files returned by GitHub API + files = [ + GitHubFile(path="src/main.py", type="blob", size=100, download_url="https://api.github.com/file1"), + GitHubFile(path="README.md", type="blob", size=50, download_url="https://api.github.com/file2"), + ] + + async def mock_get_repository_tree(owner, repo, ref): + return files + + # Setup services + rate_limiter = RateLimiter() + retry_manager = RetryManager() + github_service = GitHubAPIService(rate_limiter, retry_manager) + download_service = DownloadService(retry_manager) + + # Monkeypatch the github_service.get_repository_tree to return our files + monkeypatch.setattr(github_service, 'get_repository_tree', mock_get_repository_tree) + + orchestrator = DownloadOrchestrator(github_service, download_service) + + # Create a fake repository and ref + repo = RepositoryInfo( + owner='test', name='repo', full_name='test/repo', url='https://github.com/test/repo', + default_branch='main', repo_type=RepositoryType.PUBLIC, size=1, + is_private=False, is_fork=False, created_at=datetime.now(), updated_at=datetime.now() + ) + ref = GitReference(name='main', ref_type='branch', sha='abc') + + # Create destination and create one existing file to test skipped detection + dest = tmp_path / "out" + dest.mkdir() + existing = dest / "README.md" + existing.write_text("existing") + + request = DownloadRequest( + repository=repo, + git_ref=ref, + destination=dest, + strategy=None, + filters=FilterCriteria(), + dry_run=True + ) + + # Act + result = await orchestrator.execute_download(request) + + # Assert + assert result is not None + assert result.progress.total_files == 2 + assert result.progress.total_bytes == 150 + # No files should be downloaded in dry-run + assert result.downloaded_files == [] + # README.md should be reported as skipped + assert "README.md" in result.skipped_files From f96016b0e25cccd5a84d9bb964f9b58bf115a94e Mon Sep 17 00:00:00 2001 From: Sangam Paudel Date: Sat, 18 Oct 2025 08:58:21 +0545 Subject: [PATCH 2/3] Add verbose file-path listing and remove redundant main guard --- forklet/__main__.py | 8 +++----- forklet/core/orchestrator.py | 5 +++++ forklet/interfaces/cli.py | 32 ++++++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/forklet/__main__.py b/forklet/__main__.py index 1134ed6..70df997 100644 --- a/forklet/__main__.py +++ b/forklet/__main__.py @@ -107,7 +107,8 @@ async def run_download(): concurrent = concurrent, overwrite = overwrite, progress = not no_progress, - dry_run = dry_run + dry_run = dry_run, + verbose = ctx.obj.get('verbose', False) ) asyncio.run(run_download()) @@ -195,7 +196,4 @@ def version(): #### MAIN ENTRYPOINT FOR THE FORKLET CLI def main(): cli() - - -if __name__ == "__main__": - main() + diff --git a/forklet/core/orchestrator.py b/forklet/core/orchestrator.py index 4314357..6193f72 100644 --- a/forklet/core/orchestrator.py +++ b/forklet/core/orchestrator.py @@ -152,6 +152,8 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult: progress=progress, started_at=datetime.now() ) + # Expose matched file paths for verbose reporting + result.matched_files = [f.path for f in target_files] self._current_result = result # If dry-run is explicitly requested, prepare a summary and return without writing files @@ -172,6 +174,7 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult: result.skipped_files = skipped result.failed_files = {} result.completed_at = datetime.now() + # matched_files already set above; keep it for verbose output logger.info(f"Dry-run: {len(target_files)} files matched, {len(skipped)} would be skipped") return result @@ -194,6 +197,8 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult: result.failed_files = failed_files result.cache_hits = stats.cache_hits result.api_calls_made = stats.api_calls + # Ensure matched_files remains available for verbose output + result.matched_files = [f.path for f in target_files] # Mark as completed stats.end_time = datetime.now() diff --git a/forklet/interfaces/cli.py b/forklet/interfaces/cli.py index 37cbf89..2a1110d 100644 --- a/forklet/interfaces/cli.py +++ b/forklet/interfaces/cli.py @@ -127,7 +127,8 @@ async def execute_download( concurrent: int, overwrite: bool, progress: bool = True, - dry_run: bool = False + dry_run: bool = False, + verbose: bool = False, ) -> None: """ Execute the download operation. @@ -177,9 +178,9 @@ async def execute_download( f"🚀 Starting download with {concurrent} concurrent workers..." ) result = await self.orchestrator.execute_download(request) - - # Display results - self.display_results(result) + + # Display results (pass through verbose flag) + self.display_results(result, verbose=verbose) except ( RateLimitError, AuthenticationError, @@ -193,7 +194,7 @@ async def execute_download( logger.exception("Unexpected error in download operation") sys.exit(1) - def display_results(self, result: DownloadResult) -> None: + def display_results(self, result: DownloadResult, verbose: bool = False) -> None: """ Display download results in a user-friendly format. @@ -208,9 +209,28 @@ def display_results(self, result: DownloadResult) -> None: if result.average_speed is not None: click.echo(f" ⚡ Speed: {result.average_speed:.2f} bytes/sec") - + if result.skipped_files: click.echo(f" ⏭️ Skipped: {len(result.skipped_files)} files") + + # When verbose, display file paths (matched / downloaded / skipped) + if verbose: + # Matched files (available in dry-run and set by orchestrator) + if hasattr(result, 'matched_files') and result.matched_files: + click.echo(" 🔎 Matched files:") + for p in result.matched_files: + click.echo(f" {p}") + + # For completed runs, show downloaded and skipped paths + if result.downloaded_files: + click.echo(" 📥 Downloaded paths:") + for p in result.downloaded_files: + click.echo(f" {p}") + + if result.skipped_files: + click.echo(" ⏭️ Skipped paths:") + for p in result.skipped_files: + click.echo(f" {p}") elif hasattr(result, 'failed_files') and result.failed_files: click.echo("⚠️ Download completed with errors:") From 0b2d1272ccabc54625b04b10f69e72565007895c Mon Sep 17 00:00:00 2001 From: Sangam Paudel Date: Sat, 18 Oct 2025 19:25:39 +0545 Subject: [PATCH 3/3] Remove unncessary block --- forklet/__main__.py | 1 + forklet/core/orchestrator.py | 4 +--- forklet/models/download.py | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/forklet/__main__.py b/forklet/__main__.py index 70df997..3cb3c81 100644 --- a/forklet/__main__.py +++ b/forklet/__main__.py @@ -196,4 +196,5 @@ def version(): #### MAIN ENTRYPOINT FOR THE FORKLET CLI def main(): cli() + diff --git a/forklet/core/orchestrator.py b/forklet/core/orchestrator.py index 6193f72..be0f026 100644 --- a/forklet/core/orchestrator.py +++ b/forklet/core/orchestrator.py @@ -197,9 +197,7 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult: result.failed_files = failed_files result.cache_hits = stats.cache_hits result.api_calls_made = stats.api_calls - # Ensure matched_files remains available for verbose output - result.matched_files = [f.path for f in target_files] - + # Mark as completed stats.end_time = datetime.now() result.mark_completed() diff --git a/forklet/models/download.py b/forklet/models/download.py index 44a0b83..60bb228 100644 --- a/forklet/models/download.py +++ b/forklet/models/download.py @@ -192,6 +192,8 @@ class DownloadResult: downloaded_files: List[str] = field(default_factory=list) skipped_files: List[str] = field(default_factory=list) failed_files: Dict[str, str] = field(default_factory=dict) + # Matched file paths (populated by orchestrator for verbose reporting) + matched_files: List[str] = field(default_factory=list) # Metadata started_at: datetime = field(default_factory=datetime.now)