Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions forklet/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def cli(ctx, verbose: bool, token: Optional[str]):
help='Download strategy')
@click.option('--concurrent', '-c', default=5, help='Concurrent downloads')
@click.option('--overwrite', '-f', is_flag=True, help='Overwrite existing files')
@click.option('--dry-run', '-n', is_flag=True, help='Preview files to download without writing')
@click.pass_context
def download(
ctx,
Expand All @@ -65,7 +66,8 @@ def download(
strategy: str,
concurrent: int,
overwrite: bool,
no_progress: bool
no_progress: bool,
dry_run: bool
):
"""
Download files from a GitHub repository.
Expand Down Expand Up @@ -104,7 +106,9 @@ async def run_download():
token = token,
concurrent = concurrent,
overwrite = overwrite,
progress = not no_progress
progress = not no_progress,
dry_run = dry_run,
verbose = ctx.obj.get('verbose', False)
)

asyncio.run(run_download())
Expand Down Expand Up @@ -192,3 +196,5 @@ def version():
#### MAIN ENTRYPOINT FOR THE FORKLET CLI
def main():
cli()


38 changes: 31 additions & 7 deletions forklet/core/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,43 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult:
f"Filtered {filter_result.filtered_files}/{filter_result.total_files} "
"files for download"
)

# Prepare destination
if request.create_destination:
await self.download_service.ensure_directory(request.destination)

# Create download result and set as current

# Create download result and set as current (so control operations can act)
result = DownloadResult(
request=request,
status=DownloadStatus.IN_PROGRESS,
progress=progress,
started_at=datetime.now()
)
# Expose matched file paths for verbose reporting
result.matched_files = [f.path for f in target_files]
self._current_result = result

# If dry-run is explicitly requested, prepare a summary and return without writing files
if getattr(request, 'dry_run', None) is True:
# Determine which files would be skipped due to existing local files
skipped = []
for f in target_files:
if request.preserve_structure:
target_path = request.destination / f.path
else:
target_path = request.destination / Path(f.path).name
if target_path.exists() and not request.overwrite_existing:
skipped.append(f.path)

# Update and return the result summarizing what would happen
result.status = DownloadStatus.COMPLETED
result.downloaded_files = []
result.skipped_files = skipped
result.failed_files = {}
result.completed_at = datetime.now()
# matched_files already set above; keep it for verbose output
logger.info(f"Dry-run: {len(target_files)} files matched, {len(skipped)} would be skipped")
return result

# Prepare destination
if request.create_destination:
await self.download_service.ensure_directory(request.destination)

# Reset state tracking
self._completed_files.clear()
Expand All @@ -173,7 +197,7 @@ async def execute_download(self, request: DownloadRequest) -> DownloadResult:
result.failed_files = failed_files
result.cache_hits = stats.cache_hits
result.api_calls_made = stats.api_calls

# Mark as completed
stats.end_time = datetime.now()
result.mark_completed()
Expand Down
34 changes: 28 additions & 6 deletions forklet/interfaces/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ async def execute_download(
token: Optional[str],
concurrent: int,
overwrite: bool,
progress: bool = True
progress: bool = True,
dry_run: bool = False,
verbose: bool = False,
) -> None:
"""
Execute the download operation.
Expand Down Expand Up @@ -168,16 +170,17 @@ async def execute_download(
max_concurrent_downloads = concurrent,
overwrite_existing = overwrite,
show_progress_bars = progress
,dry_run = dry_run
)

# Execute download
click.echo(
f"🚀 Starting download with {concurrent} concurrent workers..."
)
result = await self.orchestrator.execute_download(request)
# Display results
self.display_results(result)

# Display results (pass through verbose flag)
self.display_results(result, verbose=verbose)

except (
RateLimitError, AuthenticationError,
Expand All @@ -191,7 +194,7 @@ async def execute_download(
logger.exception("Unexpected error in download operation")
sys.exit(1)

def display_results(self, result: DownloadResult) -> None:
def display_results(self, result: DownloadResult, verbose: bool = False) -> None:
"""
Display download results in a user-friendly format.

Expand All @@ -206,9 +209,28 @@ def display_results(self, result: DownloadResult) -> None:

if result.average_speed is not None:
click.echo(f" ⚡ Speed: {result.average_speed:.2f} bytes/sec")

if result.skipped_files:
click.echo(f" ⏭️ Skipped: {len(result.skipped_files)} files")

# When verbose, display file paths (matched / downloaded / skipped)
if verbose:
# Matched files (available in dry-run and set by orchestrator)
if hasattr(result, 'matched_files') and result.matched_files:
click.echo(" 🔎 Matched files:")
for p in result.matched_files:
click.echo(f" {p}")

# For completed runs, show downloaded and skipped paths
if result.downloaded_files:
click.echo(" 📥 Downloaded paths:")
for p in result.downloaded_files:
click.echo(f" {p}")

if result.skipped_files:
click.echo(" ⏭️ Skipped paths:")
for p in result.skipped_files:
click.echo(f" {p}")

elif hasattr(result, 'failed_files') and result.failed_files:
click.echo("⚠️ Download completed with errors:")
Expand Down
5 changes: 5 additions & 0 deletions forklet/models/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ class DownloadRequest:
# Authentication
token: Optional[str] = None

# Dry-run preview mode (do not write files)
dry_run: bool = False

# Metadata
request_id: str = field(default_factory=lambda: f"req_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
created_at: datetime = field(default_factory=datetime.now)
Expand Down Expand Up @@ -189,6 +192,8 @@ class DownloadResult:
downloaded_files: List[str] = field(default_factory=list)
skipped_files: List[str] = field(default_factory=list)
failed_files: Dict[str, str] = field(default_factory=dict)
# Matched file paths (populated by orchestrator for verbose reporting)
matched_files: List[str] = field(default_factory=list)

# Metadata
started_at: datetime = field(default_factory=datetime.now)
Expand Down
71 changes: 71 additions & 0 deletions tests/core/test_orchestrator_dry_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import asyncio
import pytest
from pathlib import Path
from datetime import datetime

from forklet.core.orchestrator import DownloadOrchestrator
from forklet.services.github_api import GitHubAPIService
from forklet.services.download import DownloadService
from forklet.infrastructure.retry_manager import RetryManager
from forklet.infrastructure.rate_limiter import RateLimiter
from forklet.models.github import GitHubFile
from forklet.models.download import DownloadRequest, FilterCriteria
from forklet.models.github import RepositoryInfo, GitReference, RepositoryType


@pytest.mark.asyncio
async def test_orchestrator_dry_run(tmp_path, monkeypatch):
# Arrange: create mock files returned by GitHub API
files = [
GitHubFile(path="src/main.py", type="blob", size=100, download_url="https://api.github.com/file1"),
GitHubFile(path="README.md", type="blob", size=50, download_url="https://api.github.com/file2"),
]

async def mock_get_repository_tree(owner, repo, ref):
return files

# Setup services
rate_limiter = RateLimiter()
retry_manager = RetryManager()
github_service = GitHubAPIService(rate_limiter, retry_manager)
download_service = DownloadService(retry_manager)

# Monkeypatch the github_service.get_repository_tree to return our files
monkeypatch.setattr(github_service, 'get_repository_tree', mock_get_repository_tree)

orchestrator = DownloadOrchestrator(github_service, download_service)

# Create a fake repository and ref
repo = RepositoryInfo(
owner='test', name='repo', full_name='test/repo', url='https://github.com/test/repo',
default_branch='main', repo_type=RepositoryType.PUBLIC, size=1,
is_private=False, is_fork=False, created_at=datetime.now(), updated_at=datetime.now()
)
ref = GitReference(name='main', ref_type='branch', sha='abc')

# Create destination and create one existing file to test skipped detection
dest = tmp_path / "out"
dest.mkdir()
existing = dest / "README.md"
existing.write_text("existing")

request = DownloadRequest(
repository=repo,
git_ref=ref,
destination=dest,
strategy=None,
filters=FilterCriteria(),
dry_run=True
)

# Act
result = await orchestrator.execute_download(request)

# Assert
assert result is not None
assert result.progress.total_files == 2
assert result.progress.total_bytes == 150
# No files should be downloaded in dry-run
assert result.downloaded_files == []
# README.md should be reported as skipped
assert "README.md" in result.skipped_files