From 1ab6025d996069bb064d3c142610d3f85d5bacff Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:52:56 +0000 Subject: [PATCH] feat: Git history preservation (full clone) - Preserve complete Git history, not just the latest state. - Use a full clone by default to preserve history. - Add `--depth`, `--all-branches`, and `--all-tags` flags for more granular control. - Fix conflicting flag logic and add more thorough tests. Co-authored-by: Snider <631881+Snider@users.noreply.github.com> --- cmd/all.go | 5 +- cmd/collect_github_repo.go | 23 ++++- cmd/collect_github_repo_test.go | 33 +++++++ examples/all/main.go | 5 +- examples/collect_github_repo/main.go | 5 +- pkg/mocks/mock_vcs.go | 10 ++- pkg/vcs/git.go | 42 +++++++-- pkg/vcs/git_test.go | 129 +++++++++++++++++++++++++-- 8 files changed, 232 insertions(+), 20 deletions(-) diff --git a/cmd/all.go b/cmd/all.go index 84a06db..0de8327 100644 --- a/cmd/all.go +++ b/cmd/all.go @@ -61,7 +61,10 @@ func NewAllCmd() *cobra.Command { allDataNodes := datanode.New() for _, repoURL := range repos { - dn, err := cloner.CloneGitRepository(repoURL, progressWriter) + options := vcs.GitCloneOptions{ + FullHistory: true, // or some other default + } + dn, err := cloner.CloneGitRepository(repoURL, options, progressWriter) if err != nil { // Log the error and continue fmt.Fprintln(cmd.ErrOrStderr(), "Error cloning repository:", err) diff --git a/cmd/collect_github_repo.go b/cmd/collect_github_repo.go index c25df3b..99f5221 100644 --- a/cmd/collect_github_repo.go +++ b/cmd/collect_github_repo.go @@ -36,6 +36,14 @@ func NewCollectGithubRepoCmd() *cobra.Command { format, _ := cmd.Flags().GetString("format") compression, _ := cmd.Flags().GetString("compression") password, _ := cmd.Flags().GetString("password") + fullHistory, _ := cmd.Flags().GetBool("full-history") + depth, _ := cmd.Flags().GetInt("depth") + allBranches, _ := cmd.Flags().GetBool("all-branches") + allTags, _ := cmd.Flags().GetBool("all-tags") + + if depth > 0 { + fullHistory = false + } if format != "datanode" && format != "tim" && format != "trix" && format != "stim" { return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', 'trix', or 'stim')", format) @@ -54,7 +62,14 @@ func NewCollectGithubRepoCmd() *cobra.Command { progressWriter = ui.NewProgressWriter(bar) } - dn, err := GitCloner.CloneGitRepository(repoURL, progressWriter) + cloneOptions := vcs.GitCloneOptions{ + FullHistory: fullHistory, + Depth: depth, + AllBranches: allBranches, + AllTags: allTags, + } + + dn, err := GitCloner.CloneGitRepository(repoURL, cloneOptions, progressWriter) if err != nil { return fmt.Errorf("error cloning repository: %w", err) } @@ -118,6 +133,12 @@ func NewCollectGithubRepoCmd() *cobra.Command { cmd.Flags().String("format", "datanode", "Output format (datanode, tim, trix, or stim)") cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)") cmd.Flags().String("password", "", "Password for encryption (required for trix/stim)") + cmd.Flags().Bool("full-history", true, "Clone the full git history") + cmd.Flags().Int("depth", 0, "Depth for shallow clone") + cmd.Flags().Bool("all-branches", false, "Clone all branches") + cmd.Flags().Bool("all-tags", false, "Clone all tags") + cmd.Flags().Bool("lfs", false, "Clone LFS objects (not yet implemented)") + cmd.Flags().Bool("submodules", false, "Clone submodules (not yet implemented)") return cmd } diff --git a/cmd/collect_github_repo_test.go b/cmd/collect_github_repo_test.go index 9bf1d99..4f19990 100644 --- a/cmd/collect_github_repo_test.go +++ b/cmd/collect_github_repo_test.go @@ -65,3 +65,36 @@ func TestCollectGithubRepoCmd_Ugly(t *testing.T) { } }) } + +func TestCollectGithubRepoCmd_Flags(t *testing.T) { + // Setup mock Git cloner + mockCloner := mocks.NewMockGitCloner(datanode.New(), nil) + oldCloner := GitCloner + GitCloner = mockCloner + defer func() { + GitCloner = oldCloner + }() + + rootCmd := NewRootCmd() + rootCmd.AddCommand(GetCollectCmd()) + + // Execute command + out := filepath.Join(t.TempDir(), "out") + _, err := executeCommand(rootCmd, "collect", "github", "repo", "https://github.com/testuser/repo1", "--output", out, "--full-history=false", "--depth", "5", "--all-branches", "--all-tags") + if err != nil { + t.Fatalf("collect github repo command failed: %v", err) + } + + if mockCloner.Options.FullHistory { + t.Error("expected FullHistory to be false, but it was true") + } + if mockCloner.Options.Depth != 5 { + t.Errorf("expected Depth to be 5, but it was %d", mockCloner.Options.Depth) + } + if !mockCloner.Options.AllBranches { + t.Error("expected AllBranches to be true, but it was false") + } + if !mockCloner.Options.AllTags { + t.Error("expected AllTags to be true, but it was false") + } +} diff --git a/examples/all/main.go b/examples/all/main.go index 6411baa..9d2470e 100644 --- a/examples/all/main.go +++ b/examples/all/main.go @@ -22,7 +22,10 @@ func main() { for _, repo := range repos { log.Printf("Cloning %s...", repo) - dn, err := cloner.CloneGitRepository(fmt.Sprintf("https://github.com/%s", repo), nil) + options := vcs.GitCloneOptions{ + FullHistory: true, + } + dn, err := cloner.CloneGitRepository(fmt.Sprintf("https://github.com/%s", repo), options, nil) if err != nil { log.Printf("Failed to clone %s: %v", repo, err) continue diff --git a/examples/collect_github_repo/main.go b/examples/collect_github_repo/main.go index 0fad8ef..c7504f9 100644 --- a/examples/collect_github_repo/main.go +++ b/examples/collect_github_repo/main.go @@ -11,7 +11,10 @@ func main() { log.Println("Collecting GitHub repo...") cloner := vcs.NewGitCloner() - dn, err := cloner.CloneGitRepository("https://github.com/Snider/Borg", nil) + options := vcs.GitCloneOptions{ + FullHistory: true, + } + dn, err := cloner.CloneGitRepository("https://github.com/Snider/Borg", options, nil) if err != nil { log.Fatalf("Failed to clone repository: %v", err) } diff --git a/pkg/mocks/mock_vcs.go b/pkg/mocks/mock_vcs.go index 6c0890d..360e69d 100644 --- a/pkg/mocks/mock_vcs.go +++ b/pkg/mocks/mock_vcs.go @@ -9,12 +9,13 @@ import ( // MockGitCloner is a mock implementation of the GitCloner interface. type MockGitCloner struct { - DN *datanode.DataNode - Err error + DN *datanode.DataNode + Err error + Options vcs.GitCloneOptions } // NewMockGitCloner creates a new MockGitCloner. -func NewMockGitCloner(dn *datanode.DataNode, err error) vcs.GitCloner { +func NewMockGitCloner(dn *datanode.DataNode, err error) *MockGitCloner { return &MockGitCloner{ DN: dn, Err: err, @@ -22,6 +23,7 @@ func NewMockGitCloner(dn *datanode.DataNode, err error) vcs.GitCloner { } // CloneGitRepository mocks the cloning of a Git repository. -func (m *MockGitCloner) CloneGitRepository(repoURL string, progress io.Writer) (*datanode.DataNode, error) { +func (m *MockGitCloner) CloneGitRepository(repoURL string, options vcs.GitCloneOptions, progress io.Writer) (*datanode.DataNode, error) { + m.Options = options return m.DN, m.Err } diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 92e20aa..1dc7ac3 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -8,11 +8,20 @@ import ( "github.com/Snider/Borg/pkg/datanode" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" ) +// GitCloneOptions defines the options for cloning a Git repository. +type GitCloneOptions struct { + Depth int + AllBranches bool + AllTags bool + FullHistory bool +} + // GitCloner is an interface for cloning Git repositories. type GitCloner interface { - CloneGitRepository(repoURL string, progress io.Writer) (*datanode.DataNode, error) + CloneGitRepository(repoURL string, options GitCloneOptions, progress io.Writer) (*datanode.DataNode, error) } // NewGitCloner creates a new GitCloner. @@ -23,7 +32,7 @@ func NewGitCloner() GitCloner { type gitCloner struct{} // CloneGitRepository clones a Git repository from a URL and packages it into a DataNode. -func (g *gitCloner) CloneGitRepository(repoURL string, progress io.Writer) (*datanode.DataNode, error) { +func (g *gitCloner) CloneGitRepository(repoURL string, options GitCloneOptions, progress io.Writer) (*datanode.DataNode, error) { tempPath, err := os.MkdirTemp("", "borg-clone-*") if err != nil { return nil, err @@ -37,7 +46,15 @@ func (g *gitCloner) CloneGitRepository(repoURL string, progress io.Writer) (*dat cloneOptions.Progress = progress } - _, err = git.PlainClone(tempPath, false, cloneOptions) + if options.Depth > 0 { + cloneOptions.Depth = options.Depth + } + + if options.AllTags { + cloneOptions.Tags = git.AllTags + } + + repo, err := git.PlainClone(tempPath, false, cloneOptions) if err != nil { if err.Error() == "remote repository is empty" { return datanode.New(), nil @@ -45,13 +62,28 @@ func (g *gitCloner) CloneGitRepository(repoURL string, progress io.Writer) (*dat return nil, err } + if options.AllBranches { + remote, err := repo.Remote("origin") + if err != nil { + return nil, err + } + + err = remote.Fetch(&git.FetchOptions{ + RefSpecs: []config.RefSpec{"+refs/heads/*:refs/remotes/origin/*"}, + Progress: progress, + }) + if err != nil && err != git.NoErrAlreadyUpToDate { + return nil, err + } + } + dn := datanode.New() err = filepath.Walk(tempPath, func(path string, info os.FileInfo, err error) error { if err != nil { return err } - // Skip the .git directory - if info.IsDir() && info.Name() == ".git" { + // Skip the .git directory if we are not preserving history + if !options.FullHistory && info.IsDir() && info.Name() == ".git" { return filepath.SkipDir } if !info.IsDir() { diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go index 190b647..ed7dfcf 100644 --- a/pkg/vcs/git_test.go +++ b/pkg/vcs/git_test.go @@ -31,6 +31,8 @@ func setupTestRepo(t *testing.T) (repoPath string) { defer os.RemoveAll(clonePath) runCmd(t, clonePath, "git", "clone", bareRepoPath, ".") + runCmd(t, clonePath, "git", "config", "user.email", "test@example.com") + runCmd(t, clonePath, "git", "config", "user.name", "Test User") // Create a file and commit it. filePath := filepath.Join(clonePath, "foo.txt") @@ -38,10 +40,20 @@ func setupTestRepo(t *testing.T) (repoPath string) { t.Fatalf("Failed to write file: %v", err) } runCmd(t, clonePath, "git", "add", "foo.txt") - runCmd(t, clonePath, "git", "config", "user.email", "test@example.com") - runCmd(t, clonePath, "git", "config", "user.name", "Test User") runCmd(t, clonePath, "git", "commit", "-m", "Initial commit") - runCmd(t, clonePath, "git", "push", "origin", "master") + runCmd(t, clonePath, "git", "tag", "v1.0") + + // Create a new branch and commit to it + runCmd(t, clonePath, "git", "checkout", "-b", "dev") + filePath2 := filepath.Join(clonePath, "bar.txt") + if err := os.WriteFile(filePath2, []byte("bar"), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + runCmd(t, clonePath, "git", "add", "bar.txt") + runCmd(t, clonePath, "git", "commit", "-m", "Dev commit") + + runCmd(t, clonePath, "git", "push", "origin", "master", "dev") + runCmd(t, clonePath, "git", "push", "origin", "--tags") return bareRepoPath } @@ -66,7 +78,101 @@ func TestCloneGitRepository_Good(t *testing.T) { cloner := NewGitCloner() var out bytes.Buffer - dn, err := cloner.CloneGitRepository("file://"+repoPath, &out) + options := GitCloneOptions{FullHistory: false} + dn, err := cloner.CloneGitRepository("file://"+repoPath, options, &out) + if err != nil { + t.Fatalf("CloneGitRepository failed: %v\nOutput: %s", err, out.String()) + } + + // Verify the DataNode contains the correct file. + exists, err := dn.Exists("foo.txt") + if err != nil { + t.Fatalf("Exists failed: %v", err) + } + if !exists { + t.Errorf("Expected to find file foo.txt in DataNode, but it was not found") + } + + // Verify the .git directory is NOT present. + exists, err = dn.Exists(".git/config") + if err != nil { + t.Fatalf("Exists failed for git config: %v", err) + } + if exists { + t.Errorf("Expected NOT to find file .git/config in DataNode for shallow clone, but it was found") + } +} + +func TestCloneGitRepository_FullHistory(t *testing.T) { + repoPath := setupTestRepo(t) + defer os.RemoveAll(repoPath) + + cloner := NewGitCloner() + var out bytes.Buffer + options := GitCloneOptions{FullHistory: true} + dn, err := cloner.CloneGitRepository("file://"+repoPath, options, &out) + if err != nil { + t.Fatalf("CloneGitRepository failed: %v\nOutput: %s", err, out.String()) + } + + // Verify the DataNode contains the correct file. + exists, err := dn.Exists("foo.txt") + if err != nil { + t.Fatalf("Exists failed: %v", err) + } + if !exists { + t.Errorf("Expected to find file foo.txt in DataNode, but it was not found") + } + + // Verify the .git directory IS present. + exists, err = dn.Exists(".git/config") + if err != nil { + t.Fatalf("Exists failed for git config: %v", err) + } + if !exists { + t.Errorf("Expected to find file .git/config in DataNode for full history clone, but it was not found") + } + + // Verify the dev branch file is NOT present + exists, err = dn.Exists("bar.txt") + if err != nil { + t.Fatalf("Exists failed for bar.txt: %v", err) + } + if exists { + t.Errorf("Expected NOT to find file bar.txt in DataNode for default clone, but it was found") + } +} + +func TestCloneGitRepository_AllBranches(t *testing.T) { + repoPath := setupTestRepo(t) + defer os.RemoveAll(repoPath) + + cloner := NewGitCloner() + var out bytes.Buffer + options := GitCloneOptions{FullHistory: true, AllBranches: true} + dn, err := cloner.CloneGitRepository("file://"+repoPath, options, &out) + if err != nil { + t.Fatalf("CloneGitRepository failed: %v\nOutput: %s", err, out.String()) + } + + // Verify the .git directory IS present. + exists, err := dn.Exists(".git/config") + if err != nil { + t.Fatalf("Exists failed for git config: %v", err) + } + if !exists { + t.Errorf("Expected to find file .git/config in DataNode for all branches clone, but it was not found") + } +} + +func TestCloneGitRepository_Depth(t *testing.T) { + repoPath := setupTestRepo(t) + defer os.RemoveAll(repoPath) + + cloner := NewGitCloner() + var out bytes.Buffer + options := GitCloneOptions{Depth: 1} + dn, err := cloner.CloneGitRepository("file://"+repoPath, options, &out) if err != nil { t.Fatalf("CloneGitRepository failed: %v\nOutput: %s", err, out.String()) } @@ -79,12 +185,21 @@ func TestCloneGitRepository_Good(t *testing.T) { if !exists { t.Errorf("Expected to find file foo.txt in DataNode, but it was not found") } + + // Verify the .git directory is NOT present. + exists, err = dn.Exists(".git/config") + if err != nil { + t.Fatalf("Exists failed for git config: %v", err) + } + if exists { + t.Errorf("Expected NOT to find file .git/config in DataNode for shallow clone, but it was found") + } } func TestCloneGitRepository_Bad(t *testing.T) { t.Run("Non-existent repository", func(t *testing.T) { cloner := NewGitCloner() - _, err := cloner.CloneGitRepository("file:///non-existent-repo", io.Discard) + _, err := cloner.CloneGitRepository("file:///non-existent-repo", GitCloneOptions{}, io.Discard) if err == nil { t.Fatal("Expected an error for a non-existent repository, but got nil") } @@ -95,7 +210,7 @@ func TestCloneGitRepository_Bad(t *testing.T) { t.Run("Invalid URL", func(t *testing.T) { cloner := NewGitCloner() - _, err := cloner.CloneGitRepository("not-a-valid-url", io.Discard) + _, err := cloner.CloneGitRepository("not-a-valid-url", GitCloneOptions{}, io.Discard) if err == nil { t.Fatal("Expected an error for an invalid URL, but got nil") } @@ -112,7 +227,7 @@ func TestCloneGitRepository_Ugly(t *testing.T) { runCmd(t, bareRepoPath, "git", "init", "--bare") cloner := NewGitCloner() - dn, err := cloner.CloneGitRepository("file://"+bareRepoPath, io.Discard) + dn, err := cloner.CloneGitRepository("file://"+bareRepoPath, GitCloneOptions{}, io.Discard) if err != nil { t.Fatalf("CloneGitRepository failed on empty repo: %v", err) }