-
Notifications
You must be signed in to change notification settings - Fork 0
feat: Add collectors for npm, cargo, and go modules #89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "os" | ||
|
|
||
| "github.com/Snider/Borg/pkg/collect" | ||
| "github.com/spf13/cobra" | ||
| ) | ||
|
|
||
| // collectCargoCmd represents the collect cargo command | ||
| var collectCargoCmd = NewCollectCargoCmd() | ||
|
|
||
| func init() { | ||
| GetCollectCmd().AddCommand(GetCollectCargoCmd()) | ||
| } | ||
|
|
||
| func GetCollectCargoCmd() *cobra.Command { | ||
| return collectCargoCmd | ||
| } | ||
|
|
||
| func NewCollectCargoCmd() *cobra.Command { | ||
| collectCargoCmd := &cobra.Command{ | ||
| Use: "cargo [package]", | ||
| Short: "Collect a single cargo package", | ||
| Long: `Collect a single cargo package and store it in a DataNode.`, | ||
| Args: cobra.ExactArgs(1), | ||
| RunE: func(cmd *cobra.Command, args []string) error { | ||
| packageName := args[0] | ||
| outputFile, err := cmd.Flags().GetString("output") | ||
| if err != nil { | ||
| return fmt.Errorf("could not get output flag: %w", err) | ||
| } | ||
|
|
||
| collector := collect.NewCargoCollector() | ||
| dn, err := collector.Collect(packageName) | ||
| if err != nil { | ||
| return fmt.Errorf("error collecting cargo package: %w", err) | ||
| } | ||
|
|
||
| data, err := dn.ToTar() | ||
| if err != nil { | ||
| return fmt.Errorf("error serializing DataNode: %w", err) | ||
| } | ||
|
|
||
| if outputFile == "" { | ||
| outputFile = packageName + ".dat" | ||
| } | ||
|
|
||
| err = os.WriteFile(outputFile, data, 0644) | ||
| if err != nil { | ||
| return fmt.Errorf("error writing cargo package to file: %w", err) | ||
| } | ||
|
|
||
| fmt.Fprintln(cmd.OutOrStdout(), "Cargo package saved to", outputFile) | ||
| return nil | ||
| }, | ||
| } | ||
| collectCargoCmd.PersistentFlags().String("output", "", "Output file for the DataNode") | ||
| return collectCargoCmd | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,61 @@ | ||||||
| package cmd | ||||||
|
|
||||||
| import ( | ||||||
| "fmt" | ||||||
| "os" | ||||||
|
|
||||||
| "github.com/Snider/Borg/pkg/collect" | ||||||
| "github.com/spf13/cobra" | ||||||
| ) | ||||||
|
|
||||||
| // collectGoCmd represents the collect go command | ||||||
| var collectGoCmd = NewCollectGoCmd() | ||||||
|
|
||||||
| func init() { | ||||||
| GetCollectCmd().AddCommand(GetCollectGoCmd()) | ||||||
| } | ||||||
|
|
||||||
| func GetCollectGoCmd() *cobra.Command { | ||||||
| return collectGoCmd | ||||||
| } | ||||||
|
|
||||||
| func NewCollectGoCmd() *cobra.Command { | ||||||
| collectGoCmd := &cobra.Command{ | ||||||
| Use: "go [module]", | ||||||
| Short: "Collect a single Go module", | ||||||
| Long: `Collect a single Go module and store it in a DataNode.`, | ||||||
| Args: cobra.ExactArgs(1), | ||||||
| RunE: func(cmd *cobra.Command, args []string) error { | ||||||
| modulePath := args[0] | ||||||
| outputFile, err := cmd.Flags().GetString("output") | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("could not get output flag: %w", err) | ||||||
| } | ||||||
|
|
||||||
| collector := collect.NewGoCollector() | ||||||
| dn, err := collector.Collect(modulePath) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error collecting go module: %w", err) | ||||||
| } | ||||||
|
|
||||||
| data, err := dn.ToTar() | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error serializing DataNode: %w", err) | ||||||
| } | ||||||
|
|
||||||
| if outputFile == "" { | ||||||
| outputFile = modulePath + ".dat" | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The module path can contain characters like Note: you will need to import the
Suggested change
|
||||||
| } | ||||||
|
|
||||||
| err = os.WriteFile(outputFile, data, 0644) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error writing go module to file: %w", err) | ||||||
| } | ||||||
|
|
||||||
| fmt.Fprintln(cmd.OutOrStdout(), "Go module saved to", outputFile) | ||||||
| return nil | ||||||
| }, | ||||||
| } | ||||||
| collectGoCmd.PersistentFlags().String("output", "", "Output file for the DataNode") | ||||||
| return collectGoCmd | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,61 @@ | ||||||
| package cmd | ||||||
|
|
||||||
| import ( | ||||||
| "fmt" | ||||||
| "os" | ||||||
|
|
||||||
| "github.com/Snider/Borg/pkg/collect" | ||||||
| "github.com/spf13/cobra" | ||||||
| ) | ||||||
|
|
||||||
| // collectNpmCmd represents the collect npm command | ||||||
| var collectNpmCmd = NewCollectNpmCmd() | ||||||
|
|
||||||
| func init() { | ||||||
| GetCollectCmd().AddCommand(GetCollectNpmCmd()) | ||||||
| } | ||||||
|
|
||||||
| func GetCollectNpmCmd() *cobra.Command { | ||||||
| return collectNpmCmd | ||||||
| } | ||||||
|
|
||||||
| func NewCollectNpmCmd() *cobra.Command { | ||||||
| collectNpmCmd := &cobra.Command{ | ||||||
| Use: "npm [package]", | ||||||
| Short: "Collect a single npm package", | ||||||
| Long: `Collect a single npm package and store it in a DataNode.`, | ||||||
| Args: cobra.ExactArgs(1), | ||||||
| RunE: func(cmd *cobra.Command, args []string) error { | ||||||
| packageName := args[0] | ||||||
| outputFile, err := cmd.Flags().GetString("output") | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("could not get output flag: %w", err) | ||||||
| } | ||||||
|
|
||||||
| collector := collect.NewNPMCollector() | ||||||
| dn, err := collector.Collect(packageName) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error collecting npm package: %w", err) | ||||||
| } | ||||||
|
|
||||||
| data, err := dn.ToTar() | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error serializing DataNode: %w", err) | ||||||
| } | ||||||
|
|
||||||
| if outputFile == "" { | ||||||
| outputFile = packageName + ".dat" | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NPM package names can be scoped (e.g., For example, you could replace Note: you will need to import the
Suggested change
|
||||||
| } | ||||||
|
|
||||||
| err = os.WriteFile(outputFile, data, 0644) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error writing npm package to file: %w", err) | ||||||
| } | ||||||
|
|
||||||
| fmt.Fprintln(cmd.OutOrStdout(), "NPM package saved to", outputFile) | ||||||
| return nil | ||||||
| }, | ||||||
| } | ||||||
| collectNpmCmd.PersistentFlags().String("output", "", "Output file for the DataNode") | ||||||
| return collectNpmCmd | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,114 @@ | ||||||||||||||
| package collect | ||||||||||||||
|
|
||||||||||||||
| import ( | ||||||||||||||
| "encoding/json" | ||||||||||||||
| "fmt" | ||||||||||||||
| "io" | ||||||||||||||
| "net/http" | ||||||||||||||
|
|
||||||||||||||
| "github.com/Snider/Borg/pkg/datanode" | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| // CargoRegistryURL is the base URL for the cargo registry. | ||||||||||||||
| const CargoRegistryURL = "https://crates.io/api/v1" | ||||||||||||||
|
|
||||||||||||||
| // CargoCollector is a collector for cargo packages. | ||||||||||||||
| type CargoCollector struct { | ||||||||||||||
| client *http.Client | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // NewCargoCollector creates a new CargoCollector. | ||||||||||||||
| func NewCargoCollector() *CargoCollector { | ||||||||||||||
| return &CargoCollector{ | ||||||||||||||
| client: &http.Client{}, | ||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency with
Suggested change
|
||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Collect fetches a cargo package and returns a DataNode. | ||||||||||||||
| func (c *CargoCollector) Collect(crateName string) (*datanode.DataNode, error) { | ||||||||||||||
| meta, err := c.fetchCrateMetadata(crateName) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return nil, fmt.Errorf("could not fetch crate metadata: %w", err) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| dn := datanode.New() | ||||||||||||||
| metadata, err := json.MarshalIndent(meta, "", " ") | ||||||||||||||
| if err != nil { | ||||||||||||||
| return nil, fmt.Errorf("could not marshal metadata: %w", err) | ||||||||||||||
| } | ||||||||||||||
| dn.AddData("metadata.json", metadata) | ||||||||||||||
|
|
||||||||||||||
| for _, version := range meta.Versions { | ||||||||||||||
| if err := c.fetchAndAddCrate(dn, version.DlPath, version.Num+".crate"); err != nil { | ||||||||||||||
| return nil, fmt.Errorf("could not fetch crate for version %s: %w", version.Num, err) | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+42
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If fetching a single crate version fails, the entire collection process is aborted. This behavior is inconsistent with the Note: you will need to import the
Suggested change
|
||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return dn, nil | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| func (c *CargoCollector) fetchCrateMetadata(crateName string) (*CargoCrate, error) { | ||||||||||||||
| req, err := http.NewRequest("GET", fmt.Sprintf("%s/crates/%s", CargoRegistryURL, crateName), nil) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return nil, err | ||||||||||||||
| } | ||||||||||||||
| req.Header.Set("User-Agent", "git/oxide-0.38.0") | ||||||||||||||
|
|
||||||||||||||
| resp, err := c.client.Do(req) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return nil, err | ||||||||||||||
| } | ||||||||||||||
| defer resp.Body.Close() | ||||||||||||||
|
|
||||||||||||||
| if resp.StatusCode != http.StatusOK { | ||||||||||||||
| return nil, fmt.Errorf("bad status: %s", resp.Status) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| var crate CargoCrate | ||||||||||||||
| if err := json.NewDecoder(resp.Body).Decode(&crate); err != nil { | ||||||||||||||
| return nil, err | ||||||||||||||
| } | ||||||||||||||
| return &crate, nil | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| func (c *CargoCollector) fetchAndAddCrate(dn *datanode.DataNode, downloadURL, filename string) error { | ||||||||||||||
| req, err := http.NewRequest("GET", fmt.Sprintf("https://crates.io%s", downloadURL), nil) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return err | ||||||||||||||
| } | ||||||||||||||
| req.Header.Set("User-Agent", "git/oxide-0.38.0") | ||||||||||||||
|
|
||||||||||||||
| resp, err := c.client.Do(req) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return err | ||||||||||||||
| } | ||||||||||||||
| defer resp.Body.Close() | ||||||||||||||
|
|
||||||||||||||
| if resp.StatusCode != http.StatusOK { | ||||||||||||||
| return fmt.Errorf("bad status: %s", resp.Status) | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| data, err := io.ReadAll(resp.Body) | ||||||||||||||
| if err != nil { | ||||||||||||||
| return err | ||||||||||||||
| } | ||||||||||||||
| dn.AddData(filename, data) | ||||||||||||||
| return nil | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // CargoCrate represents the metadata for a cargo crate. | ||||||||||||||
| type CargoCrate struct { | ||||||||||||||
| Crate CargoCrateData `json:"crate"` | ||||||||||||||
| Versions []CargoVersionData `json:"versions"` | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // CargoCrateData represents the metadata for a cargo crate. | ||||||||||||||
| type CargoCrateData struct { | ||||||||||||||
| Name string `json:"name"` | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // CargoVersionData represents the metadata for a specific version of a cargo crate. | ||||||||||||||
| type CargoVersionData struct { | ||||||||||||||
| Num string `json:"num"` | ||||||||||||||
| DlPath string `json:"dl_path"` | ||||||||||||||
| } | ||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| package collect | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "io" | ||
| "net/http" | ||
| "strings" | ||
| "testing" | ||
| ) | ||
|
|
||
| func TestCargoCollector_Collect(t *testing.T) { | ||
| client := &http.Client{ | ||
| Transport: &mockHTTPClient{ | ||
| responses: map[string]*http.Response{ | ||
| "https://crates.io/api/v1/crates/monero-rs": { | ||
| StatusCode: http.StatusOK, | ||
| Body: io.NopCloser(strings.NewReader(`{ | ||
| "crate": { | ||
| "name": "monero-rs" | ||
| }, | ||
| "versions": [ | ||
| { | ||
| "num": "0.1.0", | ||
| "dl_path": "/api/v1/crates/monero-rs/0.1.0/download" | ||
| } | ||
| ] | ||
| }`)), | ||
| }, | ||
| "https://crates.io/api/v1/crates/monero-rs/0.1.0/download": { | ||
| StatusCode: http.StatusOK, | ||
| Body: io.NopCloser(bytes.NewReader([]byte("crate content"))), | ||
| }, | ||
| }, | ||
| }, | ||
| } | ||
|
|
||
| collector := &CargoCollector{client: client} | ||
| dn, err := collector.Collect("monero-rs") | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
|
|
||
| if _, err := dn.Stat("metadata.json"); err != nil { | ||
| t.Errorf("expected metadata.json to exist") | ||
| } | ||
|
|
||
| if _, err := dn.Stat("0.1.0.crate"); err != nil { | ||
| t.Errorf("expected 0.1.0.crate to exist") | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The logic within this
RunEfunction is very similar to theRunEfunctions incollect_go.goandcollect_npm.go. This duplication makes the code harder to maintain. Consider refactoring this common logic into a single, generic function. This function could be parameterized with the specifics for each collector, such as the collector creation logic and user-facing messages.