Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ borg collect github repo <url> # Clone repository
borg collect github repos <owner> # Clone all repos from user/org
borg collect website <url> --depth 2 # Crawl website
borg collect pwa --uri <url> # Download PWA
borg collect npm <package> # Collect npm package
borg collect cargo <package> # Collect cargo crate
borg collect go <module> # Collect Go module

# Compilation
borg compile -f Borgfile -o out.tim # Plain TIM
Expand Down
61 changes: 61 additions & 0 deletions cmd/collect_cargo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package cmd

import (
"fmt"
"os"

"github.com/Snider/Borg/pkg/collect"
"github.com/spf13/cobra"
)

// collectCargoCmd represents the collect cargo command
var collectCargoCmd = NewCollectCargoCmd()

func init() {
GetCollectCmd().AddCommand(GetCollectCargoCmd())
}

func GetCollectCargoCmd() *cobra.Command {
return collectCargoCmd
}

func NewCollectCargoCmd() *cobra.Command {
collectCargoCmd := &cobra.Command{
Use: "cargo [package]",
Short: "Collect a single cargo package",
Long: `Collect a single cargo package and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
packageName := args[0]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("could not get output flag: %w", err)
}

collector := collect.NewCargoCollector()
dn, err := collector.Collect(packageName)
if err != nil {
return fmt.Errorf("error collecting cargo package: %w", err)
}

data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}

if outputFile == "" {
outputFile = packageName + ".dat"
}

err = os.WriteFile(outputFile, data, 0644)
if err != nil {
return fmt.Errorf("error writing cargo package to file: %w", err)
}

fmt.Fprintln(cmd.OutOrStdout(), "Cargo package saved to", outputFile)
return nil
},
Comment on lines +28 to +57

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic within this RunE function is very similar to the RunE functions in collect_go.go and collect_npm.go. This duplication makes the code harder to maintain. Consider refactoring this common logic into a single, generic function. This function could be parameterized with the specifics for each collector, such as the collector creation logic and user-facing messages.

}
collectCargoCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
return collectCargoCmd
}
61 changes: 61 additions & 0 deletions cmd/collect_go.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package cmd

import (
"fmt"
"os"

"github.com/Snider/Borg/pkg/collect"
"github.com/spf13/cobra"
)

// collectGoCmd represents the collect go command
var collectGoCmd = NewCollectGoCmd()

func init() {
GetCollectCmd().AddCommand(GetCollectGoCmd())
}

func GetCollectGoCmd() *cobra.Command {
return collectGoCmd
}

func NewCollectGoCmd() *cobra.Command {
collectGoCmd := &cobra.Command{
Use: "go [module]",
Short: "Collect a single Go module",
Long: `Collect a single Go module and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
modulePath := args[0]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("could not get output flag: %w", err)
}

collector := collect.NewGoCollector()
dn, err := collector.Collect(modulePath)
if err != nil {
return fmt.Errorf("error collecting go module: %w", err)
}

data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}

if outputFile == "" {
outputFile = modulePath + ".dat"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The module path can contain characters like / which are invalid in filenames. This will cause os.WriteFile to fail when a default output file is being created. The module path should be sanitized to create a valid filename. For example, you could replace / with _.

Note: you will need to import the strings package.

Suggested change
outputFile = modulePath + ".dat"
outputFile = strings.ReplaceAll(modulePath, "/", "_") + ".dat"

}

err = os.WriteFile(outputFile, data, 0644)
if err != nil {
return fmt.Errorf("error writing go module to file: %w", err)
}

fmt.Fprintln(cmd.OutOrStdout(), "Go module saved to", outputFile)
return nil
},
}
collectGoCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
return collectGoCmd
}
61 changes: 61 additions & 0 deletions cmd/collect_npm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package cmd

import (
"fmt"
"os"

"github.com/Snider/Borg/pkg/collect"
"github.com/spf13/cobra"
)

// collectNpmCmd represents the collect npm command
var collectNpmCmd = NewCollectNpmCmd()

func init() {
GetCollectCmd().AddCommand(GetCollectNpmCmd())
}

func GetCollectNpmCmd() *cobra.Command {
return collectNpmCmd
}

func NewCollectNpmCmd() *cobra.Command {
collectNpmCmd := &cobra.Command{
Use: "npm [package]",
Short: "Collect a single npm package",
Long: `Collect a single npm package and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
packageName := args[0]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("could not get output flag: %w", err)
}

collector := collect.NewNPMCollector()
dn, err := collector.Collect(packageName)
if err != nil {
return fmt.Errorf("error collecting npm package: %w", err)
}

data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}

if outputFile == "" {
outputFile = packageName + ".dat"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

NPM package names can be scoped (e.g., @angular/cli), which contain characters like @ and / that are invalid or problematic in filenames. This will cause os.WriteFile to fail for scoped packages when a default output file is being created. The package name should be sanitized to create a valid filename.

For example, you could replace / with _ and remove the leading @.

Note: you will need to import the strings package.

Suggested change
outputFile = packageName + ".dat"
outputFile = strings.ReplaceAll(strings.TrimPrefix(packageName, "@"), "/", "_") + ".dat"

}

err = os.WriteFile(outputFile, data, 0644)
if err != nil {
return fmt.Errorf("error writing npm package to file: %w", err)
}

fmt.Fprintln(cmd.OutOrStdout(), "NPM package saved to", outputFile)
return nil
},
}
collectNpmCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
return collectNpmCmd
}
6 changes: 6 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,17 @@ Subcommands:
- `borg collect github repos <org-or-user> [--output <file>] [--format ...] [--compression ...]`
- `borg collect website <url> [--depth N] [--output <file>] [--format ...] [--compression ...]`
- `borg collect pwa --uri <url> [--output <file>] [--format ...] [--compression ...]`
- `borg collect npm <package-name> [--output <file>]`
- `borg collect cargo <crate-name> [--output <file>]`
- `borg collect go <module-name> [--output <file>]`

Examples:
- `borg collect github repo https://github.com/Snider/Borg --output borg.dat`
- `borg collect website https://example.com --depth 1 --output site.dat`
- `borg collect pwa --uri https://squoosh.app --output squoosh.dat`
- `borg collect npm @angular/cli --output angular-cli.dat`
- `borg collect cargo serde --output serde.dat`
- `borg collect go golang.org/x/text --output go-text.dat`

### all

Expand Down
114 changes: 114 additions & 0 deletions pkg/collect/cargo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package collect

import (
"encoding/json"
"fmt"
"io"
"net/http"

"github.com/Snider/Borg/pkg/datanode"
)

// CargoRegistryURL is the base URL for the cargo registry.
const CargoRegistryURL = "https://crates.io/api/v1"

// CargoCollector is a collector for cargo packages.
type CargoCollector struct {
client *http.Client
}

// NewCargoCollector creates a new CargoCollector.
func NewCargoCollector() *CargoCollector {
return &CargoCollector{
client: &http.Client{},

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with NewGoCollector and NewNPMCollector, which use http.DefaultClient, consider using http.DefaultClient here as well instead of creating a new http.Client{}. This promotes consistency across collectors and leverages the shared transport of the default client.

Suggested change
client: &http.Client{},
client: http.DefaultClient,

}
}

// Collect fetches a cargo package and returns a DataNode.
func (c *CargoCollector) Collect(crateName string) (*datanode.DataNode, error) {
meta, err := c.fetchCrateMetadata(crateName)
if err != nil {
return nil, fmt.Errorf("could not fetch crate metadata: %w", err)
}

dn := datanode.New()
metadata, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return nil, fmt.Errorf("could not marshal metadata: %w", err)
}
dn.AddData("metadata.json", metadata)

for _, version := range meta.Versions {
if err := c.fetchAndAddCrate(dn, version.DlPath, version.Num+".crate"); err != nil {
return nil, fmt.Errorf("could not fetch crate for version %s: %w", version.Num, err)
}
Comment on lines +42 to +44

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If fetching a single crate version fails, the entire collection process is aborted. This behavior is inconsistent with the NPMCollector, which logs the error and continues to fetch other versions. To provide a more robust user experience and align with the behavior of other collectors, consider logging the error and continuing the loop instead of returning an error immediately. This allows the collection of as many versions as possible, even if some fail.

Note: you will need to import the log package.

Suggested change
if err := c.fetchAndAddCrate(dn, version.DlPath, version.Num+".crate"); err != nil {
return nil, fmt.Errorf("could not fetch crate for version %s: %w", version.Num, err)
}
if err := c.fetchAndAddCrate(dn, version.DlPath, version.Num+".crate"); err != nil {
log.Printf("could not fetch crate for version %s: %v", version.Num, err)
}

}

return dn, nil
}

func (c *CargoCollector) fetchCrateMetadata(crateName string) (*CargoCrate, error) {
req, err := http.NewRequest("GET", fmt.Sprintf("%s/crates/%s", CargoRegistryURL, crateName), nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "git/oxide-0.38.0")

resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("bad status: %s", resp.Status)
}

var crate CargoCrate
if err := json.NewDecoder(resp.Body).Decode(&crate); err != nil {
return nil, err
}
return &crate, nil
}

func (c *CargoCollector) fetchAndAddCrate(dn *datanode.DataNode, downloadURL, filename string) error {
req, err := http.NewRequest("GET", fmt.Sprintf("https://crates.io%s", downloadURL), nil)
if err != nil {
return err
}
req.Header.Set("User-Agent", "git/oxide-0.38.0")

resp, err := c.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}

data, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
dn.AddData(filename, data)
return nil
}

// CargoCrate represents the metadata for a cargo crate.
type CargoCrate struct {
Crate CargoCrateData `json:"crate"`
Versions []CargoVersionData `json:"versions"`
}

// CargoCrateData represents the metadata for a cargo crate.
type CargoCrateData struct {
Name string `json:"name"`
}

// CargoVersionData represents the metadata for a specific version of a cargo crate.
type CargoVersionData struct {
Num string `json:"num"`
DlPath string `json:"dl_path"`
}
50 changes: 50 additions & 0 deletions pkg/collect/cargo_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package collect

import (
"bytes"
"io"
"net/http"
"strings"
"testing"
)

func TestCargoCollector_Collect(t *testing.T) {
client := &http.Client{
Transport: &mockHTTPClient{
responses: map[string]*http.Response{
"https://crates.io/api/v1/crates/monero-rs": {
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(`{
"crate": {
"name": "monero-rs"
},
"versions": [
{
"num": "0.1.0",
"dl_path": "/api/v1/crates/monero-rs/0.1.0/download"
}
]
}`)),
},
"https://crates.io/api/v1/crates/monero-rs/0.1.0/download": {
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader([]byte("crate content"))),
},
},
},
}

collector := &CargoCollector{client: client}
dn, err := collector.Collect("monero-rs")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

if _, err := dn.Stat("metadata.json"); err != nil {
t.Errorf("expected metadata.json to exist")
}

if _, err := dn.Stat("0.1.0.crate"); err != nil {
t.Errorf("expected 0.1.0.crate to exist")
}
}
Loading
Loading