From a7e6eef5672d36de300c82372c841f0d368c601c Mon Sep 17 00:00:00 2001 From: Leonardo Maldonado Date: Sun, 12 Apr 2026 12:26:03 +0200 Subject: [PATCH 01/13] fix(cmd): harden cli test coverage paths Add command-level integration and fuzz coverage across parser flows. Fix config-driven structured output when no links are found or all links are ignored. Fix redirect auto-fixes for duplicate URLs across files and make filter tracking race-safe. --- cmd/check.go | 16 +- cmd/helpers_test.go | 337 ++++++++++++++++++ cmd/integration_test.go | 202 +++++++++++ internal/filter/filter.go | 26 +- internal/fixer/fixer.go | 25 +- internal/fixer/fixer_test.go | 28 ++ internal/parser/json/json_fuzz_test.go | 31 ++ .../parser/markdown/markdown_fuzz_test.go | 29 ++ internal/parser/parser_fuzz_test.go | 52 +++ internal/parser/registry_workflow_test.go | 119 +++++++ internal/parser/toml/toml_fuzz_test.go | 31 ++ internal/parser/xml/xml_fuzz_test.go | 31 ++ internal/parser/yaml/yaml_fuzz_test.go | 31 ++ 13 files changed, 946 insertions(+), 12 deletions(-) create mode 100644 cmd/helpers_test.go create mode 100644 cmd/integration_test.go create mode 100644 internal/parser/json/json_fuzz_test.go create mode 100644 internal/parser/markdown/markdown_fuzz_test.go create mode 100644 internal/parser/parser_fuzz_test.go create mode 100644 internal/parser/registry_workflow_test.go create mode 100644 internal/parser/toml/toml_fuzz_test.go create mode 100644 internal/parser/xml/xml_fuzz_test.go create mode 100644 internal/parser/yaml/yaml_fuzz_test.go diff --git a/cmd/check.go b/cmd/check.go index dca1f39..13f5949 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -268,7 +268,8 @@ func parseAndFilterLinksWithConfig( if len(parserLinks) == 0 { perf.EndParse(0, 0, 0, 0) effectiveShowStats := cfg.GetShowStats(showStats) - handleEmptyLinksWithStatsV2(files, useStructuredOutput, perf, effectiveShowStats) + effectiveFormat := cfg.GetOutputFormat(outputFormat) + handleEmptyLinksWithStatsV2(files, useStructuredOutput, perf, effectiveFormat, effectiveShowStats) return nil, nil, true } @@ -289,7 +290,8 @@ func parseAndFilterLinksWithConfig( if len(links) == 0 { effectiveShowStats := cfg.GetShowStats(showStats) - handleAllFilteredWithStatsV2(files, useStructuredOutput, urlFilter, perf, effectiveShowStats) + effectiveFormat := cfg.GetOutputFormat(outputFormat) + handleAllFilteredWithStatsV2(files, useStructuredOutput, urlFilter, perf, effectiveFormat, effectiveShowStats) return nil, urlFilter, true } @@ -357,10 +359,12 @@ func validateCheckFlags() error { } // handleEmptyLinksWithStatsV2 handles the case when no links are found, with config. -func handleEmptyLinksWithStatsV2(files []string, useStructuredOutput bool, perf *stats.Stats, effectiveShowStats bool) { +func handleEmptyLinksWithStatsV2( + files []string, useStructuredOutput bool, perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, +) { switch { case useStructuredOutput: - handleStructuredOutputWithStatsV2(files, nil, checker.Summary{}, nil, perf, outputFormat, effectiveShowStats) + handleStructuredOutputWithStatsV2(files, nil, checker.Summary{}, nil, perf, effectiveFormat, effectiveShowStats) case outputFile != "": handleFileOutputWithStatsV2(files, nil, checker.Summary{}, nil, perf, effectiveShowStats) default: @@ -374,12 +378,12 @@ func handleEmptyLinksWithStatsV2(files []string, useStructuredOutput bool, perf // handleAllFilteredWithStatsV2 handles the case when all links were filtered out, with config. func handleAllFilteredWithStatsV2( files []string, useStructuredOutput bool, urlFilter *filter.Filter, - perf *stats.Stats, effectiveShowStats bool, + perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, ) { switch { case useStructuredOutput: handleStructuredOutputWithStatsV2( - files, nil, checker.Summary{}, urlFilter, perf, outputFormat, effectiveShowStats, + files, nil, checker.Summary{}, urlFilter, perf, effectiveFormat, effectiveShowStats, ) case outputFile != "": handleFileOutputWithStatsV2(files, nil, checker.Summary{}, urlFilter, perf, effectiveShowStats) diff --git a/cmd/helpers_test.go b/cmd/helpers_test.go new file mode 100644 index 0000000..1e1cd7e --- /dev/null +++ b/cmd/helpers_test.go @@ -0,0 +1,337 @@ +package cmd + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/config" + "github.com/leonardomso/gone/internal/filter" + "github.com/leonardomso/gone/internal/output" + "github.com/leonardomso/gone/internal/parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLoadConfig(t *testing.T) { + origWD, err := os.Getwd() + require.NoError(t, err) + + tmpDir := t.TempDir() + configPath := filepath.Join(tmpDir, config.DefaultConfigFileName) + configContent := ` +types: [json, yaml] +scan: + include: ["docs/**"] + exclude: ["vendor/**"] +check: + concurrency: 17 + timeout: 9 + retries: 3 + strict: true +output: + format: yaml + showAlive: true +ignore: + domains: [example.com] +` + require.NoError(t, os.WriteFile(configPath, []byte(configContent), 0o644)) + require.NoError(t, os.Chdir(tmpDir)) + t.Cleanup(func() { + _ = os.Chdir(origWD) + }) + + loaded, err := LoadConfig(false) + require.NoError(t, err) + assert.Equal(t, []string{"json", "yaml"}, loaded.Config().Types) + assert.Equal(t, []string{"docs/**"}, loaded.Config().Scan.Include) + assert.Equal(t, 17, loaded.Config().Check.Concurrency) + assert.Equal(t, output.FormatYAML, output.Format(loaded.GetOutputFormat(""))) + + noCfg, err := LoadConfig(true) + require.NoError(t, err) + assert.True(t, noCfg.Config().IsEmpty()) +} + +func TestLoadConfig_InvalidConfig(t *testing.T) { + origWD, err := os.Getwd() + require.NoError(t, err) + + tmpDir := t.TempDir() + configPath := filepath.Join(tmpDir, config.DefaultConfigFileName) + require.NoError(t, os.WriteFile(configPath, []byte("output:\n format: invalid\n"), 0o644)) + require.NoError(t, os.Chdir(tmpDir)) + t.Cleanup(func() { + _ = os.Chdir(origWD) + }) + + loaded, err := LoadConfig(false) + require.Error(t, err) + assert.Nil(t, loaded) + assert.Contains(t, err.Error(), "invalid config") +} + +func TestLoadedConfig_GettersAndBuilders(t *testing.T) { + t.Parallel() + + showWarnings := false + showDead := false + + loaded := &LoadedConfig{ + cfg: &config.Config{ + Types: []string{"json", "xml"}, + Scan: config.ScanConfig{ + Include: []string{"docs/**"}, + Exclude: []string{"vendor/**"}, + }, + Check: config.CheckConfig{ + Concurrency: 12, + Timeout: 7, + Retries: 4, + Strict: true, + }, + Output: config.OutputConfig{ + Format: "json", + ShowAlive: true, + ShowWarnings: &showWarnings, + ShowDead: &showDead, + ShowStats: true, + }, + }, + } + + assert.Equal(t, []string{"json", "xml"}, loaded.GetTypes([]string{"md"}, []string{"md"})) + assert.Equal(t, []string{"md"}, loaded.GetTypes([]string{"md"}, []string{"json"})) + assert.Equal(t, []string{"yaml"}, loaded.GetTypes([]string{"yaml"}, []string{"md"})) + + opts := loaded.BuildCheckerOptions(checker.DefaultConcurrency, int(checker.DefaultTimeout.Seconds()), checker.DefaultMaxRetries) + assert.Equal(t, 12, opts.Concurrency) + assert.Equal(t, 7*time.Second, opts.Timeout) + assert.Equal(t, 4, opts.MaxRetries) + + scanOpts := loaded.BuildScanOptions("/tmp/docs", []string{"md"}, []string{"md"}) + assert.Equal(t, "/tmp/docs", scanOpts.Root) + assert.Equal(t, []string{"json", "xml"}, scanOpts.Types) + assert.Equal(t, []string{"docs/**"}, scanOpts.Include) + assert.Equal(t, []string{"vendor/**"}, scanOpts.Exclude) + + assert.True(t, loaded.GetStrict(false)) + assert.Equal(t, "json", loaded.GetOutputFormat("")) + assert.True(t, loaded.GetShowAlive(false)) + assert.False(t, loaded.GetShowWarnings(false)) + assert.False(t, loaded.GetShowDead(false)) + assert.True(t, loaded.GetShowStats(false)) +} + +func TestCreateFilter(t *testing.T) { + origWD, err := os.Getwd() + require.NoError(t, err) + + tmpDir := t.TempDir() + configPath := filepath.Join(tmpDir, config.DefaultConfigFileName) + configContent := ` +ignore: + domains: [config.example] + patterns: ["*.internal/*"] +` + require.NoError(t, os.WriteFile(configPath, []byte(configContent), 0o644)) + require.NoError(t, os.Chdir(tmpDir)) + t.Cleanup(func() { + _ = os.Chdir(origWD) + }) + + urlFilter, err := CreateFilter(FilterOptions{ + Domains: []string{"cli.example"}, + Patterns: []string{"*.cli/*"}, + Regex: []string{`example\.org`}, + }) + require.NoError(t, err) + require.NotNil(t, urlFilter) + + assert.True(t, urlFilter.ShouldIgnore("https://config.example/path", "README.md", 1)) + assert.True(t, urlFilter.ShouldIgnore("https://foo.cli/bar", "README.md", 2)) + assert.True(t, urlFilter.ShouldIgnore("https://service.internal/path", "README.md", 3)) + assert.True(t, urlFilter.ShouldIgnore("https://example.org/path", "README.md", 4)) + + noRules, err := CreateFilter(FilterOptions{NoConfig: true}) + require.NoError(t, err) + assert.Nil(t, noRules) +} + +func TestCreateFilterWithConfig(t *testing.T) { + t.Parallel() + + cfg := &config.Config{ + Ignore: config.IgnoreConfig{ + Domains: []string{"config.example"}, + Patterns: []string{"*.internal/*"}, + Regex: []string{`ignored\.org`}, + }, + } + + urlFilter, err := CreateFilterWithConfig( + cfg, + []string{"cli.example"}, + []string{"*.cli/*"}, + []string{`other\.org`}, + ) + require.NoError(t, err) + require.NotNil(t, urlFilter) + + assert.True(t, urlFilter.ShouldIgnore("https://config.example/path", "a.md", 1)) + assert.True(t, urlFilter.ShouldIgnore("https://service.cli/path", "a.md", 2)) + assert.True(t, urlFilter.ShouldIgnore("https://service.internal/path", "a.md", 3)) + assert.True(t, urlFilter.ShouldIgnore("https://ignored.org/path", "a.md", 4)) + assert.True(t, urlFilter.ShouldIgnore("https://other.org/path", "a.md", 5)) +} + +func TestCreateFilterWithConfig_NoRules(t *testing.T) { + t.Parallel() + + urlFilter, err := CreateFilterWithConfig(&config.Config{}, nil, nil, nil) + require.NoError(t, err) + assert.Nil(t, urlFilter) +} + +func TestLinkConversionAndFiltering(t *testing.T) { + t.Parallel() + + parserLinks := []parser.Link{ + {URL: "https://allowed.example", FilePath: "README.md", Line: 3, Text: "Allowed"}, + {URL: "https://ignored.example", FilePath: "README.md", Line: 5, Text: "Ignored"}, + {URL: "https://allowed.example", FilePath: "README.md", Line: 8, Text: "Duplicate"}, + } + + converted := ConvertParserLinks(parserLinks) + require.Len(t, converted, len(parserLinks)) + assert.Equal(t, parserLinks[0].URL, converted[0].URL) + assert.Equal(t, parserLinks[1].FilePath, converted[1].FilePath) + + urlFilter, err := filter.New(filter.Config{Domains: []string{"ignored.example"}}) + require.NoError(t, err) + + filtered := FilterParserLinks(parserLinks, urlFilter) + require.Len(t, filtered, 2) + assert.Equal(t, "https://allowed.example", filtered[0].URL) + assert.Equal(t, 1, urlFilter.IgnoredCount()) + assert.Equal(t, 1, CountUniqueURLs(filtered)) +} + +func TestFilterResultHelpers(t *testing.T) { + t.Parallel() + + primary := checker.Result{ + Link: checker.Link{URL: "https://primary.example", FilePath: "README.md", Line: 1}, + Status: checker.StatusRedirect, + } + + results := []checker.Result{ + {Link: checker.Link{URL: "https://alive.example"}, Status: checker.StatusAlive}, + primary, + {Link: checker.Link{URL: "https://blocked.example"}, Status: checker.StatusBlocked}, + {Link: checker.Link{URL: "https://dead.example"}, Status: checker.StatusDead}, + {Link: checker.Link{URL: "https://error.example"}, Status: checker.StatusError}, + {Link: checker.Link{URL: "https://duplicate.example"}, Status: checker.StatusDuplicate, DuplicateOf: &primary}, + } + + assert.Len(t, FilterResultsWarnings(results), 2) + assert.Len(t, FilterResultsDead(results), 2) + assert.Len(t, FilterResultsDuplicates(results), 1) + assert.Len(t, FilterResultsAlive(results), 1) + assert.Equal(t, 5, len(filterResults(results))) +} + +func TestCheckHelpersAndReportBuilding(t *testing.T) { + restore := saveCheckGlobalsForTest() + defer restore() + + outputFormat = "json" + outputFile = "" + showIgnored = true + showWarnings = true + + assert.NoError(t, validateCheckFlags()) + assert.Equal(t, ".", getPathArg(nil)) + assert.Equal(t, "docs", getPathArg([]string{"docs"})) + assert.NoError(t, validateFileTypes([]string{"md", "json", "yaml", "toml", "xml"})) + assert.Error(t, validateFileTypes([]string{"pdf"})) + assert.Equal(t, 0, getIgnoredCount(nil)) + + results := []checker.Result{ + { + Link: checker.Link{ + URL: "https://redirect.example", + FilePath: "README.md", + Line: 2, + }, + Status: checker.StatusRedirect, + StatusCode: 301, + FinalStatus: 200, + FinalURL: "https://final.example", + RedirectChain: []checker.Redirect{{URL: "https://redirect.example", StatusCode: 301}}, + }, + { + Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 4}, + Status: checker.StatusDead, + }, + } + summary := checker.Summarize(results) + + urlFilter, err := filter.New(filter.Config{Domains: []string{"ignored.example"}}) + require.NoError(t, err) + require.True(t, urlFilter.ShouldIgnore("https://ignored.example/path", "README.md", 7)) + assert.Equal(t, 1, getIgnoredCount(urlFilter)) + + report := buildReport([]string{"README.md"}, results, summary, urlFilter) + require.NotNil(t, report) + assert.Equal(t, summary.Total+1, report.TotalLinks) + require.Len(t, report.Ignored, 1) + assert.Equal(t, "https://ignored.example/path", report.Ignored[0].URL) + + assert.Equal(t, "301 → 200", formatRedirectChain(results[0])) + assert.Equal(t, "No warnings found.", getEmptyResultsMessage(checker.Summary{Alive: 2})) + + showWarnings = false + showDead = true + assert.Equal(t, "No dead links found.", getEmptyResultsMessage(checker.Summary{Alive: 2})) +} + +func TestValidateCheckFlags_InvalidCombinations(t *testing.T) { + restore := saveCheckGlobalsForTest() + defer restore() + + outputFormat = "json" + outputFile = "report.json" + err := validateCheckFlags() + require.Error(t, err) + assert.Contains(t, err.Error(), "mutually exclusive") + + outputFile = "" + outputFormat = "invalid" + err = validateCheckFlags() + require.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "invalid format")) +} + +func saveCheckGlobalsForTest() func() { + savedOutputFormat := outputFormat + savedOutputFile := outputFile + savedShowAlive := showAlive + savedShowWarnings := showWarnings + savedShowDead := showDead + savedShowAll := showAll + savedShowIgnored := showIgnored + + return func() { + outputFormat = savedOutputFormat + outputFile = savedOutputFile + showAlive = savedShowAlive + showWarnings = savedShowWarnings + showDead = savedShowDead + showAll = savedShowAll + showIgnored = savedShowIgnored + } +} diff --git a/cmd/integration_test.go b/cmd/integration_test.go new file mode 100644 index 0000000..6ec94e2 --- /dev/null +++ b/cmd/integration_test.go @@ -0,0 +1,202 @@ +//go:build integration + +package cmd_test + +import ( + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + buildBinaryOnce sync.Once + builtBinaryPath string + buildBinaryErr error +) + +func TestCheck_UsesConfigStructuredOutput_WhenNoLinksFound(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, ".gonerc.yaml"), []byte("output:\n format: json\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "README.md"), []byte("# No links here\n"), 0o644)) + + result := runGone(t, tmpDir, "check", ".") + require.Equal(t, 0, result.exitCode, result.stderr) + require.Empty(t, strings.TrimSpace(result.stderr)) + + var payload map[string]any + require.NoError(t, json.Unmarshal([]byte(result.stdout), &payload)) + assert.Equal(t, float64(1), payload["total_files"]) + assert.Equal(t, float64(0), payload["total_links"]) +} + +func TestCheck_UsesConfigStructuredOutput_WhenAllLinksIgnored(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + config := ` +output: + format: json +ignore: + domains: [ignored.example] +` + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, ".gonerc.yaml"), []byte(config), 0o644)) + require.NoError(t, os.WriteFile( + filepath.Join(tmpDir, "README.md"), + []byte("[ignored](https://ignored.example/path)\n"), + 0o644, + )) + + result := runGone(t, tmpDir, "check", ".", "--show-ignored") + require.Equal(t, 0, result.exitCode, result.stderr) + require.Empty(t, strings.TrimSpace(result.stderr)) + + var payload struct { + TotalLinks int `json:"total_links"` + Ignored []struct { + URL string `json:"url"` + } `json:"ignored"` + } + require.NoError(t, json.Unmarshal([]byte(result.stdout), &payload)) + assert.Equal(t, 1, payload.TotalLinks) + require.Len(t, payload.Ignored, 1) + assert.Equal(t, "https://ignored.example/path", payload.Ignored[0].URL) +} + +func TestFix_Yes_UpdatesRedirectsAcrossFileTypes(t *testing.T) { + t.Parallel() + + finalURL := "" + redirectServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/old": + http.Redirect(w, r, finalURL, http.StatusMovedPermanently) + case "/new": + w.WriteHeader(http.StatusOK) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer redirectServer.Close() + + finalURL = redirectServer.URL + "/new" + oldURL := redirectServer.URL + "/old" + + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(tmpDir, "README.md"), + []byte("[docs]("+oldURL+")\n"), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(tmpDir, "links.json"), + []byte(`{"primary":"`+oldURL+`"}`), + 0o644, + )) + + result := runGone(t, tmpDir, "fix", ".", "--yes", "--types=md,json", "--no-config") + require.Equal(t, 0, result.exitCode, result.stderr) + assert.Contains(t, result.stdout, "Found 2 file(s) of type(s): md, json") + assert.Contains(t, result.stdout, "Fixed 2 redirect(s) across 2 file(s):") + + mdContent, err := os.ReadFile(filepath.Join(tmpDir, "README.md")) + require.NoError(t, err) + assert.NotContains(t, string(mdContent), oldURL) + assert.Contains(t, string(mdContent), finalURL) + + jsonContent, err := os.ReadFile(filepath.Join(tmpDir, "links.json")) + require.NoError(t, err) + assert.NotContains(t, string(jsonContent), oldURL) + assert.Contains(t, string(jsonContent), finalURL) +} + +type cliResult struct { + stdout string + stderr string + exitCode int +} + +func runGone(t *testing.T, dir string, args ...string) cliResult { + t.Helper() + + binaryPath := buildGoneBinary(t) + cmd := exec.Command(binaryPath, args...) + cmd.Dir = dir + output, err := cmd.CombinedOutput() + + result := cliResult{ + stdout: string(output), + exitCode: 0, + } + + if err == nil { + return result + } + + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + result.exitCode = exitErr.ExitCode() + result.stderr = string(output) + return result + } + + t.Fatalf("run gone: %v\noutput:\n%s", err, string(output)) + return cliResult{} +} + +func buildGoneBinary(t *testing.T) string { + t.Helper() + + buildBinaryOnce.Do(func() { + rootDir := repoRoot(t) + tmpDir, err := os.MkdirTemp("", "gone-bin-*") + if err != nil { + buildBinaryErr = err + return + } + + builtBinaryPath = filepath.Join(tmpDir, "gone") + if runtime.GOOS == "windows" { + builtBinaryPath += ".exe" + } + + cmd := exec.Command("go", "build", "-o", builtBinaryPath, ".") + cmd.Dir = rootDir + buildOutput, err := cmd.CombinedOutput() + if err != nil { + buildBinaryErr = &buildError{err: err, output: string(buildOutput)} + } + }) + + require.NoError(t, buildBinaryErr) + return builtBinaryPath +} + +type buildError struct { + err error + output string +} + +func (e *buildError) Error() string { + return e.err.Error() + "\n" + e.output +} + +func repoRoot(t *testing.T) string { + t.Helper() + + _, file, _, ok := runtime.Caller(0) + require.True(t, ok) + return filepath.Dir(filepath.Dir(file)) +} diff --git a/internal/filter/filter.go b/internal/filter/filter.go index 143f9ae..80f9e10 100644 --- a/internal/filter/filter.go +++ b/internal/filter/filter.go @@ -6,6 +6,7 @@ import ( "net/url" "regexp" "strings" + "sync" "github.com/gobwas/glob" ) @@ -33,6 +34,7 @@ type Filter struct { // Track ignored URLs for reporting ignored []IgnoreReason + mu sync.RWMutex } // compiledGlob holds a glob pattern and its original string for error reporting. @@ -117,7 +119,7 @@ func (f *Filter) ShouldIgnore(rawURL, file string, line int) bool { // Check domain first (O(1) lookup) if reason, ok := f.matchesDomain(rawURL); ok { - f.ignored = append(f.ignored, IgnoreReason{ + f.recordIgnored(IgnoreReason{ Type: "domain", Rule: reason, URL: rawURL, @@ -129,7 +131,7 @@ func (f *Filter) ShouldIgnore(rawURL, file string, line int) bool { // Check glob patterns if reason, ok := f.matchesGlob(rawURL); ok { - f.ignored = append(f.ignored, IgnoreReason{ + f.recordIgnored(IgnoreReason{ Type: "pattern", Rule: reason, URL: rawURL, @@ -141,7 +143,7 @@ func (f *Filter) ShouldIgnore(rawURL, file string, line int) bool { // Check regex patterns if reason, ok := f.matchesRegex(rawURL); ok { - f.ignored = append(f.ignored, IgnoreReason{ + f.recordIgnored(IgnoreReason{ Type: "regex", Rule: reason, URL: rawURL, @@ -154,6 +156,13 @@ func (f *Filter) ShouldIgnore(rawURL, file string, line int) bool { return false } +func (f *Filter) recordIgnored(reason IgnoreReason) { + f.mu.Lock() + defer f.mu.Unlock() + + f.ignored = append(f.ignored, reason) +} + // matchesDomain checks if the URL's domain matches any ignored domain. // Also checks if the URL's domain is a subdomain of an ignored domain. func (f *Filter) matchesDomain(rawURL string) (string, bool) { @@ -214,6 +223,8 @@ func (f *Filter) IgnoredCount() int { if f == nil { return 0 } + f.mu.RLock() + defer f.mu.RUnlock() return len(f.ignored) } @@ -222,13 +233,20 @@ func (f *Filter) IgnoredURLs() []IgnoreReason { if f == nil { return nil } - return f.ignored + f.mu.RLock() + defer f.mu.RUnlock() + + ignored := make([]IgnoreReason, len(f.ignored)) + copy(ignored, f.ignored) + return ignored } // Reset clears the list of ignored URLs. // Useful if reusing a filter for multiple checks. func (f *Filter) Reset() { if f != nil { + f.mu.Lock() + defer f.mu.Unlock() f.ignored = f.ignored[:0] } } diff --git a/internal/fixer/fixer.go b/internal/fixer/fixer.go index c49f0f9..e67cf01 100644 --- a/internal/fixer/fixer.go +++ b/internal/fixer/fixer.go @@ -70,11 +70,12 @@ func (f *Fixer) FindFixes(results []checker.Result) []FileChanges { urlToParserLink := f.buildURLToLinksMap() for _, r := range results { - if !isFixableRedirect(r) { + fixableResult, ok := normalizeFixableResult(r) + if !ok { continue } - f.addOrUpdateFix(fileFixMap, r, urlToParserLink) + f.addOrUpdateFix(fileFixMap, fixableResult, urlToParserLink) } return f.buildFileChanges(fileFixMap) @@ -97,6 +98,26 @@ func isFixableRedirect(r checker.Result) bool { r.FinalURL != r.Link.URL } +// normalizeFixableResult converts duplicate results back into fixable redirects +// when their primary occurrence was a redirect to a healthy final URL. +func normalizeFixableResult(r checker.Result) (checker.Result, bool) { + if isFixableRedirect(r) { + return r, true + } + + if r.Status != checker.StatusDuplicate || r.DuplicateOf == nil { + return checker.Result{}, false + } + + primary := *r.DuplicateOf + if !isFixableRedirect(primary) { + return checker.Result{}, false + } + + primary.Link = r.Link + return primary, true +} + // addOrUpdateFix adds a new fix or increments occurrence count for existing fix. func (f *Fixer) addOrUpdateFix( fileFixMap map[string]map[string]*Fix, diff --git a/internal/fixer/fixer_test.go b/internal/fixer/fixer_test.go index 6cfe88f..8419d4a 100644 --- a/internal/fixer/fixer_test.go +++ b/internal/fixer/fixer_test.go @@ -212,6 +212,34 @@ func TestFixer_FindFixes_DuplicateURLsInSameFile(t *testing.T) { assert.Equal(t, 2, changes[0].TotalFixes) } +func TestFixer_FindFixes_DuplicateResultsAcrossFiles(t *testing.T) { + t.Parallel() + + primary := checker.Result{ + Link: checker.Link{URL: "https://old.com", FilePath: "a.md", Line: 10}, + Status: checker.StatusRedirect, + FinalURL: "https://new.com", + FinalStatus: 200, + } + + results := []checker.Result{ + primary, + { + Link: checker.Link{URL: "https://old.com", FilePath: "b.json", Line: 5}, + Status: checker.StatusDuplicate, + DuplicateOf: &primary, + }, + } + + f := New() + changes := f.FindFixes(results) + + require.Len(t, changes, 2) + assert.Equal(t, "a.md", changes[0].FilePath) + assert.Equal(t, "b.json", changes[1].FilePath) + assert.Equal(t, "https://new.com", changes[1].Fixes[0].NewURL) +} + func TestFixer_FindFixes_SortedByLine(t *testing.T) { t.Parallel() diff --git a/internal/parser/json/json_fuzz_test.go b/internal/parser/json/json_fuzz_test.go new file mode 100644 index 0000000..070d374 --- /dev/null +++ b/internal/parser/json/json_fuzz_test.go @@ -0,0 +1,31 @@ +package jsonparser + +import ( + "testing" + + "github.com/leonardomso/gone/internal/parser" +) + +func FuzzJSONParserValidateAndParse(f *testing.F) { + f.Add([]byte(`{"url":"https://example.com"}`)) + f.Add([]byte(`{"nested":{"items":["https://example.com/a","prefix https://example.com/b"]}}`)) + f.Add([]byte(`{"broken":`)) + + p := New() + + f.Fuzz(func(t *testing.T, content []byte) { + links, err := p.ValidateAndParse("fuzz.json", content) + if err != nil { + return + } + + for _, link := range links { + if !parser.IsHTTPURL(link.URL) { + t.Fatalf("non-http URL returned: %#v", link) + } + if link.FilePath != "fuzz.json" { + t.Fatalf("unexpected file path: %#v", link) + } + } + }) +} diff --git a/internal/parser/markdown/markdown_fuzz_test.go b/internal/parser/markdown/markdown_fuzz_test.go new file mode 100644 index 0000000..5414bc0 --- /dev/null +++ b/internal/parser/markdown/markdown_fuzz_test.go @@ -0,0 +1,29 @@ +package markdown + +import ( + "testing" + + "github.com/leonardomso/gone/internal/parser" +) + +func FuzzExtractLinksFromContent(f *testing.F) { + f.Add([]byte("[docs](https://example.com/docs)\n")) + f.Add([]byte("Inline URL: https://example.com/path\n")) + f.Add([]byte("```go\nhttps://ignored.example\n```\nhtml\n")) + + f.Fuzz(func(t *testing.T, content []byte) { + links, err := ExtractLinksFromContent(content, "fuzz.md") + if err != nil { + t.Fatalf("markdown parsing should not error: %v", err) + } + + for _, link := range links { + if !parser.IsHTTPURL(link.URL) { + t.Fatalf("non-http URL returned: %#v", link) + } + if link.FilePath != "fuzz.md" { + t.Fatalf("unexpected file path: %#v", link) + } + } + }) +} diff --git a/internal/parser/parser_fuzz_test.go b/internal/parser/parser_fuzz_test.go new file mode 100644 index 0000000..0cde769 --- /dev/null +++ b/internal/parser/parser_fuzz_test.go @@ -0,0 +1,52 @@ +package parser + +import "testing" + +func FuzzCleanURLTrailing(f *testing.F) { + f.Add("https://example.com/path).") + f.Add("http://example.com/test],") + f.Add("") + + f.Fuzz(func(t *testing.T, input string) { + cleaned := CleanURLTrailing(input) + if len(cleaned) > len(input) { + t.Fatalf("cleaned URL grew: input=%q cleaned=%q", input, cleaned) + } + + if CleanURLTrailing(cleaned) != cleaned { + t.Fatalf("cleaned URL is not idempotent: %q", cleaned) + } + }) +} + +func FuzzBuildLineIndexAndOffsetToLineCol(f *testing.F) { + f.Add([]byte("line1\nline2\nline3"), uint(0)) + f.Add([]byte(""), uint(5)) + f.Add([]byte("single line"), uint(50)) + + f.Fuzz(func(t *testing.T, content []byte, rawOffset uint) { + lines := BuildLineIndex(content) + if len(lines) == 0 { + t.Fatal("line index must always include the first line") + } + + offset := 0 + if len(content) > 0 { + offset = int(rawOffset % uint(len(content))) + } + + line, col := OffsetToLineCol(lines, offset) + if line < 1 || col < 1 { + t.Fatalf("invalid position: line=%d col=%d", line, col) + } + + if line > len(lines) { + t.Fatalf("line out of bounds: line=%d lines=%d", line, len(lines)) + } + + start := lines[line-1] + if start+col-1 > len(content) { + t.Fatalf("column points past content: start=%d col=%d len=%d", start, col, len(content)) + } + }) +} diff --git a/internal/parser/registry_workflow_test.go b/internal/parser/registry_workflow_test.go new file mode 100644 index 0000000..8f6b5ea --- /dev/null +++ b/internal/parser/registry_workflow_test.go @@ -0,0 +1,119 @@ +package parser_test + +import ( + "os" + "path/filepath" + "sort" + "testing" + + "github.com/leonardomso/gone/internal/parser" + _ "github.com/leonardomso/gone/internal/parser/json" + _ "github.com/leonardomso/gone/internal/parser/markdown" + _ "github.com/leonardomso/gone/internal/parser/toml" + _ "github.com/leonardomso/gone/internal/parser/xml" + _ "github.com/leonardomso/gone/internal/parser/yaml" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestExtractLinksWithRegistry(t *testing.T) { + t.Run("UnsupportedExtension", func(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "README.txt") + require.NoError(t, os.WriteFile(filePath, []byte("https://example.com"), 0o644)) + + links, err := parser.ExtractLinksWithRegistry(filePath, true) + require.Error(t, err) + assert.Nil(t, links) + assert.ErrorContains(t, err, "no parser registered") + }) + + t.Run("NonStrictSkipsMalformedFile", func(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "broken.json") + require.NoError(t, os.WriteFile(filePath, []byte(`{"url":`), 0o644)) + + links, err := parser.ExtractLinksWithRegistry(filePath, false) + require.NoError(t, err) + assert.Nil(t, links) + }) + + t.Run("StrictReturnsParseError", func(t *testing.T) { + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "broken.json") + require.NoError(t, os.WriteFile(filePath, []byte(`{"url":`), 0o644)) + + links, err := parser.ExtractLinksWithRegistry(filePath, true) + require.Error(t, err) + assert.Nil(t, links) + assert.ErrorContains(t, err, "invalid JSON") + }) +} + +func TestExtractLinksFromMultipleFilesWithRegistry(t *testing.T) { + t.Run("SequentialMixedSupportedAndUnsupportedFiles", func(t *testing.T) { + tmpDir := t.TempDir() + mdPath := filepath.Join(tmpDir, "README.md") + txtPath := filepath.Join(tmpDir, "notes.txt") + + require.NoError(t, os.WriteFile(mdPath, []byte("[docs](https://example.com/docs)\n"), 0o644)) + require.NoError(t, os.WriteFile(txtPath, []byte("https://ignored.example"), 0o644)) + + links, err := parser.ExtractLinksFromMultipleFilesWithRegistry([]string{mdPath, txtPath}, true) + require.NoError(t, err) + require.Len(t, links, 1) + assert.Equal(t, "https://example.com/docs", links[0].URL) + }) + + t.Run("ParallelAggregatesAcrossFormats", func(t *testing.T) { + tmpDir := t.TempDir() + files := map[string]string{ + filepath.Join(tmpDir, "README.md"): `[docs](https://example.com/md)` + "\n", + filepath.Join(tmpDir, "links.json"): `{"url":"https://example.com/json"}`, + filepath.Join(tmpDir, "config.yaml"): "url: https://example.com/yaml\n", + filepath.Join(tmpDir, "config.toml"): "url = 'https://example.com/toml'\n", + } + + for path, content := range files { + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + } + + paths := make([]string, 0, len(files)) + for path := range files { + paths = append(paths, path) + } + + links, err := parser.ExtractLinksFromMultipleFilesWithRegistry(paths, true) + require.NoError(t, err) + require.Len(t, links, 4) + + urls := make([]string, 0, len(links)) + for _, link := range links { + urls = append(urls, link.URL) + } + sort.Strings(urls) + + assert.Equal(t, []string{ + "https://example.com/json", + "https://example.com/md", + "https://example.com/toml", + "https://example.com/yaml", + }, urls) + }) + + t.Run("ParallelStrictReturnsFirstParseError", func(t *testing.T) { + tmpDir := t.TempDir() + validMD := filepath.Join(tmpDir, "README.md") + validYAML := filepath.Join(tmpDir, "config.yaml") + brokenJSON := filepath.Join(tmpDir, "broken.json") + + require.NoError(t, os.WriteFile(validMD, []byte("[docs](https://example.com)\n"), 0o644)) + require.NoError(t, os.WriteFile(validYAML, []byte("url: https://example.org\n"), 0o644)) + require.NoError(t, os.WriteFile(brokenJSON, []byte(`{"url":`), 0o644)) + + links, err := parser.ExtractLinksFromMultipleFilesWithRegistry([]string{validMD, validYAML, brokenJSON}, true) + require.Error(t, err) + assert.Nil(t, links) + assert.ErrorContains(t, err, "broken.json") + }) +} diff --git a/internal/parser/toml/toml_fuzz_test.go b/internal/parser/toml/toml_fuzz_test.go new file mode 100644 index 0000000..96a6d67 --- /dev/null +++ b/internal/parser/toml/toml_fuzz_test.go @@ -0,0 +1,31 @@ +package toml + +import ( + "testing" + + "github.com/leonardomso/gone/internal/parser" +) + +func FuzzTOMLParserValidateAndParse(f *testing.F) { + f.Add([]byte("url = 'https://example.com'\n")) + f.Add([]byte("[links]\nprimary = 'https://example.com/a'\nsecondary = 'prefix https://example.com/b'\n")) + f.Add([]byte("url = \n")) + + p := New() + + f.Fuzz(func(t *testing.T, content []byte) { + links, err := p.ValidateAndParse("fuzz.toml", content) + if err != nil { + return + } + + for _, link := range links { + if !parser.IsHTTPURL(link.URL) { + t.Fatalf("non-http URL returned: %#v", link) + } + if link.FilePath != "fuzz.toml" { + t.Fatalf("unexpected file path: %#v", link) + } + } + }) +} diff --git a/internal/parser/xml/xml_fuzz_test.go b/internal/parser/xml/xml_fuzz_test.go new file mode 100644 index 0000000..3367a31 --- /dev/null +++ b/internal/parser/xml/xml_fuzz_test.go @@ -0,0 +1,31 @@ +package xml + +import ( + "testing" + + "github.com/leonardomso/gone/internal/parser" +) + +func FuzzXMLParserValidateAndParse(f *testing.F) { + f.Add([]byte(`docs`)) + f.Add([]byte(`prefix https://example.com/path`)) + f.Add([]byte(``)) + + p := New() + + f.Fuzz(func(t *testing.T, content []byte) { + links, err := p.ValidateAndParse("fuzz.xml", content) + if err != nil { + return + } + + for _, link := range links { + if !parser.IsHTTPURL(link.URL) { + t.Fatalf("non-http URL returned: %#v", link) + } + if link.FilePath != "fuzz.xml" { + t.Fatalf("unexpected file path: %#v", link) + } + } + }) +} diff --git a/internal/parser/yaml/yaml_fuzz_test.go b/internal/parser/yaml/yaml_fuzz_test.go new file mode 100644 index 0000000..745bf78 --- /dev/null +++ b/internal/parser/yaml/yaml_fuzz_test.go @@ -0,0 +1,31 @@ +package yaml + +import ( + "testing" + + "github.com/leonardomso/gone/internal/parser" +) + +func FuzzYAMLParserValidateAndParse(f *testing.F) { + f.Add([]byte("url: https://example.com\n")) + f.Add([]byte("items:\n - prefix https://example.com/a\n - https://example.com/b\n")) + f.Add([]byte(":\n - invalid")) + + p := New() + + f.Fuzz(func(t *testing.T, content []byte) { + links, err := p.ValidateAndParse("fuzz.yaml", content) + if err != nil { + return + } + + for _, link := range links { + if !parser.IsHTTPURL(link.URL) { + t.Fatalf("non-http URL returned: %#v", link) + } + if link.FilePath != "fuzz.yaml" { + t.Fatalf("unexpected file path: %#v", link) + } + } + }) +} From 7206e0908a215e8e867d04ab2b5803b50820ba40 Mon Sep 17 00:00:00 2001 From: Leonardo Maldonado Date: Sun, 12 Apr 2026 16:00:48 +0200 Subject: [PATCH 02/13] refactor(cmd): improve reliability and testability Refactor check and fix into injected runners, expand command and UI tests, add end-to-end benchmarks and fixtures, and adopt pond/conc in checker and parser hot paths. --- .github/workflows/test.yml | 11 +- README.md | 28 ++ cmd/check.go | 274 +------------ cmd/check_output.go | 101 +++-- cmd/check_print.go | 107 ++--- cmd/check_runner.go | 332 ++++++++++++++++ cmd/e2e_bench_test.go | 230 +++++++++++ cmd/fix.go | 307 +------------- cmd/fix_runner.go | 292 ++++++++++++++ cmd/helpers_test.go | 17 +- cmd/integration_test.go | 123 ++++++ cmd/runner_test.go | 373 ++++++++++++++++++ cmd/runtime.go | 78 ++++ go.mod | 10 +- go.sum | 24 +- internal/checker/checker.go | 117 +++--- internal/checker/main_test.go | 11 + internal/parser/parser.go | 59 +-- internal/scanner/scanner_test.go | 42 ++ internal/ui/app_test.go | 163 ++++++++ internal/ui/main_test.go | 11 + testdata/bench/duplicate-heavy/README.md | 12 + testdata/bench/duplicate-heavy/data.json | 7 + testdata/bench/http-scenarios/README.md | 6 + testdata/bench/http-scenarios/links.json | 6 + testdata/bench/http-scenarios/settings.yaml | 5 + testdata/bench/large/api/endpoints.json | 6 + testdata/bench/large/api/openapi.json | 9 + testdata/bench/large/config/base.yaml | 7 + testdata/bench/large/config/overrides.yaml | 6 + testdata/bench/large/data/regions.toml | 6 + testdata/bench/large/data/services.toml | 8 + testdata/bench/large/docs/README.md | 5 + testdata/bench/large/docs/architecture.md | 6 + testdata/bench/large/docs/faq.md | 6 + testdata/bench/large/feeds/events.xml | 5 + testdata/bench/large/feeds/releases.xml | 5 + testdata/bench/large/nested/deep/catalog.json | 7 + testdata/bench/large/nested/deep/guide.md | 5 + testdata/bench/medium/config/app.json | 6 + testdata/bench/medium/config/services.yaml | 7 + testdata/bench/medium/docs/guide.md | 5 + testdata/bench/medium/docs/tutorial.md | 5 + testdata/bench/medium/feed.xml | 5 + testdata/bench/medium/integrations.toml | 6 + testdata/bench/medium/nested/catalog.json | 7 + testdata/bench/medium/nested/reference.md | 3 + testdata/bench/small/README.md | 6 + testdata/bench/small/config.json | 5 + testdata/bench/small/feed.xml | 4 + testdata/bench/small/links.toml | 5 + testdata/bench/small/settings.yaml | 6 + 52 files changed, 2109 insertions(+), 788 deletions(-) create mode 100644 cmd/check_runner.go create mode 100644 cmd/e2e_bench_test.go create mode 100644 cmd/fix_runner.go create mode 100644 cmd/runner_test.go create mode 100644 cmd/runtime.go create mode 100644 internal/checker/main_test.go create mode 100644 internal/ui/app_test.go create mode 100644 internal/ui/main_test.go create mode 100644 testdata/bench/duplicate-heavy/README.md create mode 100644 testdata/bench/duplicate-heavy/data.json create mode 100644 testdata/bench/http-scenarios/README.md create mode 100644 testdata/bench/http-scenarios/links.json create mode 100644 testdata/bench/http-scenarios/settings.yaml create mode 100644 testdata/bench/large/api/endpoints.json create mode 100644 testdata/bench/large/api/openapi.json create mode 100644 testdata/bench/large/config/base.yaml create mode 100644 testdata/bench/large/config/overrides.yaml create mode 100644 testdata/bench/large/data/regions.toml create mode 100644 testdata/bench/large/data/services.toml create mode 100644 testdata/bench/large/docs/README.md create mode 100644 testdata/bench/large/docs/architecture.md create mode 100644 testdata/bench/large/docs/faq.md create mode 100644 testdata/bench/large/feeds/events.xml create mode 100644 testdata/bench/large/feeds/releases.xml create mode 100644 testdata/bench/large/nested/deep/catalog.json create mode 100644 testdata/bench/large/nested/deep/guide.md create mode 100644 testdata/bench/medium/config/app.json create mode 100644 testdata/bench/medium/config/services.yaml create mode 100644 testdata/bench/medium/docs/guide.md create mode 100644 testdata/bench/medium/docs/tutorial.md create mode 100644 testdata/bench/medium/feed.xml create mode 100644 testdata/bench/medium/integrations.toml create mode 100644 testdata/bench/medium/nested/catalog.json create mode 100644 testdata/bench/medium/nested/reference.md create mode 100644 testdata/bench/small/README.md create mode 100644 testdata/bench/small/config.json create mode 100644 testdata/bench/small/feed.xml create mode 100644 testdata/bench/small/links.toml create mode 100644 testdata/bench/small/settings.yaml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e17faa3..e299a98 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,5 +17,14 @@ jobs: with: go-version: '1.25.5' - - name: Run tests + - name: Run unit tests run: go test ./... -cover + + - name: Run race tests + run: go test -race ./... + + - name: Run integration tests + run: go test -tags=integration ./cmd + + - name: Run bounded fuzz jobs + run: go test ./internal/parser/... -run=^$ -fuzz=Fuzz -fuzztime=5s diff --git a/README.md b/README.md index 6d0fb2b..da455a8 100644 --- a/README.md +++ b/README.md @@ -424,6 +424,34 @@ jobs: path: report.junit.xml ``` +## Benchmarking + +Benchmark fixtures live under `testdata/bench` and cover: + +- small, medium, and large mixed-format trees +- duplicate-heavy URL sets +- redirect-heavy and error-heavy HTTP scenarios + +Use the end-to-end harness before changing concurrency or hot-path code: + +```bash +# Capture a baseline +go test -run '^$' -bench 'BenchmarkPipeline_' ./cmd | tee /tmp/gone-bench-before.txt + +# Compare parser and checker micro-benchmarks too +go test -run '^$' -bench . ./internal/parser ./internal/checker ./cmd | tee /tmp/gone-bench-current.txt + +# After refactors, compare runs with benchstat +benchstat /tmp/gone-bench-before.txt /tmp/gone-bench-after.txt +``` + +Guidelines: + +- Run benchmarks on an otherwise idle machine when possible. +- Keep `-count` consistent across before/after runs if you use it. +- Treat neutral-or-better results on the medium and large fixtures as the gate for concurrency refactors. +- Prefer changing one hot path at a time so regressions are easy to attribute. + ## Exit Codes | Code | Meaning | diff --git a/cmd/check.go b/cmd/check.go index 13f5949..754f227 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -7,10 +7,7 @@ import ( "github.com/leonardomso/gone/internal/checker" "github.com/leonardomso/gone/internal/filter" - "github.com/leonardomso/gone/internal/output" "github.com/leonardomso/gone/internal/parser" - "github.com/leonardomso/gone/internal/scanner" - "github.com/leonardomso/gone/internal/stats" "github.com/spf13/cobra" ) @@ -152,52 +149,9 @@ func init() { // runCheck is the main entry point for the check command. // It orchestrates the entire link checking workflow. func runCheck(_ *cobra.Command, args []string) { - perf := stats.New() - exitOnError(validateCheckFlags(), "Invalid flags") - - // Load configuration - loadedCfg, err := LoadConfig(noConfig) - exitOnError(err, "Config error") - - path := getPathArg(args) - - // Determine effective output format (CLI overrides config) - effectiveFormat := loadedCfg.GetOutputFormat(outputFormat) - useStructuredOutput := effectiveFormat != "" - - // Phase 1: Scan for files - files := scanFilesWithConfig(path, loadedCfg, perf, useStructuredOutput) - - // Phase 2: Parse links from files - links, urlFilter, done := parseAndFilterLinksWithConfig(files, loadedCfg, perf, useStructuredOutput) - if done { - return - } - - // Phase 3: Check URLs - results, summary := checkLinksWithConfig(links, loadedCfg, perf) - - // Phase 4: Output results - effectiveShowStats := loadedCfg.GetShowStats(showStats) - routeOutputWithConfig( - files, results, summary, urlFilter, perf, - useStructuredOutput, effectiveFormat, effectiveShowStats, - ) - - if summary.HasDeadLinks() { - os.Exit(1) - } -} - -// exitOnError prints an error message and exits if err is not nil. -func exitOnError(err error, message string) { - if err != nil { - if message != "" { - fmt.Fprintf(os.Stderr, "%s: %v\n", message, err) - } else { - fmt.Fprintf(os.Stderr, "%v\n", err) - } - os.Exit(1) + code := newCheckRunner(currentCheckOptions(), defaultCommandEnv(), defaultIOStreams()).Run(args) + if code != 0 { + os.Exit(code) } } @@ -209,32 +163,6 @@ func getPathArg(args []string) string { return "." } -// scanFilesWithConfig scans for files using config and CLI values. -func scanFilesWithConfig(path string, cfg *LoadedConfig, perf *stats.Stats, useStructuredOutput bool) []string { - perf.StartScan() - - // Get effective file types (CLI overrides config) - effectiveTypes := cfg.GetTypes(fileTypes, []string{"md"}) - - // Validate file types - if err := validateFileTypes(effectiveTypes); err != nil { - exitOnError(err, "Invalid file types") - } - - // Build scan options from config - scanOpts := cfg.BuildScanOptions(path, fileTypes, []string{"md"}) - - files, err := scanner.FindFilesWithOptions(scanOpts) - exitOnError(err, "Error scanning directory") - perf.EndScan(len(files)) - - if !useStructuredOutput { - typeStr := strings.Join(effectiveTypes, ", ") - fmt.Printf("Found %d file(s) of type(s): %s\n", len(files), typeStr) - } - return files -} - // validateFileTypes checks if all specified file types are supported. func validateFileTypes(types []string) error { supportedTypes := parser.SupportedFileTypes() @@ -252,52 +180,6 @@ func validateFileTypes(types []string) error { return nil } -// parseAndFilterLinksWithConfig extracts links from files and applies filters using config. -// Returns the links, filter, and whether processing should stop (done=true). -func parseAndFilterLinksWithConfig( - files []string, cfg *LoadedConfig, perf *stats.Stats, useStructuredOutput bool, -) ([]checker.Link, *filter.Filter, bool) { - perf.StartParse() - - // Get effective strict mode - effectiveStrict := cfg.GetStrict(strictMode) - - parserLinks, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, effectiveStrict) - exitOnError(err, "Error parsing files") - - if len(parserLinks) == 0 { - perf.EndParse(0, 0, 0, 0) - effectiveShowStats := cfg.GetShowStats(showStats) - effectiveFormat := cfg.GetOutputFormat(outputFormat) - handleEmptyLinksWithStatsV2(files, useStructuredOutput, perf, effectiveFormat, effectiveShowStats) - return nil, nil, true - } - - // Create filter using config + CLI overrides - urlFilter, err := CreateFilterWithConfig(cfg.Config(), ignoreDomains, ignorePatterns, ignoreRegex) - exitOnError(err, "Error creating filter") - - links := FilterParserLinks(parserLinks, urlFilter) - ignoredCount := getIgnoredCount(urlFilter) - uniqueURLs := CountUniqueURLs(links) - duplicates := len(links) - uniqueURLs - - perf.EndParse(len(parserLinks), uniqueURLs, duplicates, ignoredCount) - - if !useStructuredOutput { - printProgressMessage(len(parserLinks), len(links), uniqueURLs, duplicates, ignoredCount) - } - - if len(links) == 0 { - effectiveShowStats := cfg.GetShowStats(showStats) - effectiveFormat := cfg.GetOutputFormat(outputFormat) - handleAllFilteredWithStatsV2(files, useStructuredOutput, urlFilter, perf, effectiveFormat, effectiveShowStats) - return nil, urlFilter, true - } - - return links, urlFilter, false -} - // getIgnoredCount returns the ignored count from filter, or 0 if filter is nil. func getIgnoredCount(urlFilter *filter.Filter) int { if urlFilter != nil { @@ -305,153 +187,3 @@ func getIgnoredCount(urlFilter *filter.Filter) int { } return 0 } - -// checkLinksWithConfig checks all links using config values and returns results with summary. -func checkLinksWithConfig( - links []checker.Link, cfg *LoadedConfig, perf *stats.Stats, -) ([]checker.Result, checker.Summary) { - perf.StartCheck() - - opts := cfg.BuildCheckerOptions(concurrency, timeout, retries) - - c := checker.New(opts) - results := c.CheckAll(links) - summary := checker.Summarize(results) - - perf.EndCheck() - return results, summary -} - -// routeOutputWithConfig handles output based on format flags and config. -func routeOutputWithConfig( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, useStructuredOutput bool, - effectiveFormat string, effectiveShowStats bool, -) { - switch { - case useStructuredOutput: - handleStructuredOutputWithStatsV2(files, results, summary, urlFilter, perf, effectiveFormat, effectiveShowStats) - case outputFile != "": - handleFileOutputWithStatsV2(files, results, summary, urlFilter, perf, effectiveShowStats) - default: - outputText(results, summary, urlFilter) - if effectiveShowStats { - fmt.Print(perf.String()) - } - } -} - -// validateCheckFlags checks for invalid flag combinations. -func validateCheckFlags() error { - // Validate mutually exclusive flags - if outputFormat != "" && outputFile != "" { - return fmt.Errorf("--format and --output are mutually exclusive; " + - "use --format for stdout output, or --output for file output") - } - - // Validate format if specified - if outputFormat != "" && !output.IsValidFormat(outputFormat) { - return fmt.Errorf("invalid format %q; valid formats: %s", - outputFormat, strings.Join(output.ValidFormats(), ", ")) - } - - return nil -} - -// handleEmptyLinksWithStatsV2 handles the case when no links are found, with config. -func handleEmptyLinksWithStatsV2( - files []string, useStructuredOutput bool, perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, -) { - switch { - case useStructuredOutput: - handleStructuredOutputWithStatsV2(files, nil, checker.Summary{}, nil, perf, effectiveFormat, effectiveShowStats) - case outputFile != "": - handleFileOutputWithStatsV2(files, nil, checker.Summary{}, nil, perf, effectiveShowStats) - default: - fmt.Println("No links found.") - if effectiveShowStats { - fmt.Print(perf.String()) - } - } -} - -// handleAllFilteredWithStatsV2 handles the case when all links were filtered out, with config. -func handleAllFilteredWithStatsV2( - files []string, useStructuredOutput bool, urlFilter *filter.Filter, - perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, -) { - switch { - case useStructuredOutput: - handleStructuredOutputWithStatsV2( - files, nil, checker.Summary{}, urlFilter, perf, effectiveFormat, effectiveShowStats, - ) - case outputFile != "": - handleFileOutputWithStatsV2(files, nil, checker.Summary{}, urlFilter, perf, effectiveShowStats) - default: - fmt.Println("\nAll links were ignored by filter rules.") - if showIgnored && urlFilter != nil { - printIgnoredURLs(urlFilter) - } - if effectiveShowStats { - fmt.Print(perf.String()) - } - } -} - -// handleStructuredOutputWithStatsV2 outputs to stdout with optional stats, using config. -func handleStructuredOutputWithStatsV2( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, -) { - report := buildReportWithStatsV2(files, results, summary, urlFilter, perf, effectiveShowStats) - - data, err := output.FormatReport(report, output.Format(effectiveFormat)) - if err != nil { - fmt.Fprintf(os.Stderr, "Error formatting output: %v\n", err) - os.Exit(1) - } - - fmt.Print(string(data)) -} - -// handleFileOutputWithStatsV2 writes to file with optional stats, using config. -func handleFileOutputWithStatsV2( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, effectiveShowStats bool, -) { - report := buildReportWithStatsV2(files, results, summary, urlFilter, perf, effectiveShowStats) - - if err := output.WriteToFile(report, outputFile); err != nil { - fmt.Fprintf(os.Stderr, "Error writing file: %v\n", err) - os.Exit(1) - } - - fmt.Printf("Wrote report to %s\n", outputFile) - - // Also print summary to stdout - fmt.Printf("\nSummary: %d alive | %d warnings | %d dead | %d duplicates", - summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates) - if urlFilter != nil && urlFilter.IgnoredCount() > 0 { - fmt.Printf(" | %d ignored", urlFilter.IgnoredCount()) - } - fmt.Println() - - if effectiveShowStats { - fmt.Print(perf.String()) - } -} - -// buildReportWithStatsV2 creates an output.Report with optional stats, using config. -func buildReportWithStatsV2( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, effectiveShowStats bool, -) *output.Report { - report := buildReport(files, results, summary, urlFilter) - - // Add stats if requested - if effectiveShowStats && perf != nil { - report.Stats = perf.ToJSON() - } - - return report -} diff --git a/cmd/check_output.go b/cmd/check_output.go index 0a694d6..017caee 100644 --- a/cmd/check_output.go +++ b/cmd/check_output.go @@ -2,38 +2,53 @@ package cmd import ( "fmt" + "io" "time" "github.com/leonardomso/gone/internal/checker" "github.com/leonardomso/gone/internal/filter" "github.com/leonardomso/gone/internal/output" + "github.com/samber/lo" ) +type checkRenderOptions struct { + ShowAlive bool + ShowWarnings bool + ShowDead bool + ShowAll bool + ShowIgnored bool +} + // buildReport creates an output.Report from check results. // This consolidates all data needed for formatted output. func buildReport( - files []string, results []checker.Result, summary checker.Summary, urlFilter *filter.Filter, + now time.Time, + files []string, + results []checker.Result, + summary checker.Summary, + urlFilter *filter.Filter, + opts checkRenderOptions, ) *output.Report { report := &output.Report{ - GeneratedAt: time.Now(), + GeneratedAt: now, Files: files, TotalLinks: summary.Total, UniqueURLs: summary.UniqueURLs, Summary: summary, - Results: filterResults(results), + Results: filterResults(results, opts), } // Add ignored URLs if filter is present and --show-ignored is set - if showIgnored && urlFilter != nil { - for _, ig := range urlFilter.IgnoredURLs() { - report.Ignored = append(report.Ignored, output.IgnoredURL{ + if opts.ShowIgnored && urlFilter != nil { + report.Ignored = lo.Map(urlFilter.IgnoredURLs(), func(ig filter.IgnoreReason, _ int) output.IgnoredURL { + return output.IgnoredURL{ URL: ig.URL, File: ig.File, Line: ig.Line, Reason: ig.Type, Rule: ig.Rule, - }) - } + } + }) } // Adjust total links to include ignored @@ -46,18 +61,18 @@ func buildReport( // filterResults returns results based on the filter flags. // This determines which results are included in the output based on CLI flags. -func filterResults(results []checker.Result) []checker.Result { +func filterResults(results []checker.Result, opts checkRenderOptions) []checker.Result { // If specific filter is set, use it - if showAlive { + if opts.ShowAlive { return checker.FilterAlive(results) } - if showWarnings { + if opts.ShowWarnings { return checker.FilterWarnings(results) } - if showDead { + if opts.ShowDead { return checker.FilterDead(results) } - if showAll { + if opts.ShowAll { return results } @@ -74,25 +89,25 @@ func filterResults(results []checker.Result) []checker.Result { // outputText prints results as human-readable text to stdout. // This is the default output mode when no format flag is specified. -func outputText(results []checker.Result, summary checker.Summary, urlFilter *filter.Filter) { +func outputText(w io.Writer, results []checker.Result, summary checker.Summary, urlFilter *filter.Filter, opts checkRenderOptions) { ignoredCount := getFilterIgnoredCount(urlFilter) - printSummaryLine(summary, ignoredCount) + printSummaryLine(w, summary, ignoredCount) - filtered := filterResults(results) + filtered := filterResults(results, opts) if len(filtered) == 0 { - fmt.Println(getEmptyResultsMessage(summary)) - maybeShowIgnored(urlFilter) + fmt.Fprintln(w, getEmptyResultsMessage(summary, opts)) + maybeShowIgnored(w, urlFilter, opts) return } - if shouldGroupResults() { - outputGroupedResults(filtered) + if shouldGroupResults(opts) { + outputGroupedResults(w, filtered, opts) } else { - outputFlatResults(filtered) + outputFlatResults(w, filtered) } - maybeShowIgnored(urlFilter) + maybeShowIgnored(w, urlFilter, opts) } // getFilterIgnoredCount returns the ignored count from filter, or 0 if nil. @@ -104,27 +119,27 @@ func getFilterIgnoredCount(urlFilter *filter.Filter) int { } // printSummaryLine prints the summary statistics line. -func printSummaryLine(summary checker.Summary, ignoredCount int) { - fmt.Println() +func printSummaryLine(w io.Writer, summary checker.Summary, ignoredCount int) { + fmt.Fprintln(w) if ignoredCount > 0 { - fmt.Printf("Summary: %d alive | %d warnings | %d dead | %d duplicates | %d ignored\n\n", + fmt.Fprintf(w, "Summary: %d alive | %d warnings | %d dead | %d duplicates | %d ignored\n\n", summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates, ignoredCount) } else { - fmt.Printf("Summary: %d alive | %d warnings | %d dead | %d duplicates\n\n", + fmt.Fprintf(w, "Summary: %d alive | %d warnings | %d dead | %d duplicates\n\n", summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates) } } // getEmptyResultsMessage returns the appropriate message when no results match filters. -func getEmptyResultsMessage(summary checker.Summary) string { +func getEmptyResultsMessage(summary checker.Summary, opts checkRenderOptions) string { switch { - case showAlive && summary.Alive == 0: + case opts.ShowAlive && summary.Alive == 0: return "No alive links found." - case showWarnings && summary.WarningsCount() == 0: + case opts.ShowWarnings && summary.WarningsCount() == 0: return "No warnings found." - case showDead && !summary.HasDeadLinks(): + case opts.ShowDead && !summary.HasDeadLinks(): return "No dead links found." default: return "All links are alive!" @@ -132,31 +147,31 @@ func getEmptyResultsMessage(summary checker.Summary) string { } // shouldGroupResults returns true if results should be displayed in grouped sections. -func shouldGroupResults() bool { - return showAll || (!showAlive && !showWarnings && !showDead) +func shouldGroupResults(opts checkRenderOptions) bool { + return opts.ShowAll || (!opts.ShowAlive && !opts.ShowWarnings && !opts.ShowDead) } // outputGroupedResults prints results grouped by status sections. -func outputGroupedResults(filtered []checker.Result) { - printSection("Warnings", FilterResultsWarnings(filtered), printWarningResult) - printSection("Dead Links", FilterResultsDead(filtered), printDeadResult) - printSection("Duplicates", FilterResultsDuplicates(filtered), printDuplicateResult) +func outputGroupedResults(w io.Writer, filtered []checker.Result, opts checkRenderOptions) { + printSection(w, "Warnings", FilterResultsWarnings(filtered), printWarningResult) + printSection(w, "Dead Links", FilterResultsDead(filtered), printDeadResult) + printSection(w, "Duplicates", FilterResultsDuplicates(filtered), printDuplicateResult) - if showAll { - printSection("Alive", FilterResultsAlive(filtered), printAliveResult) + if opts.ShowAll { + printSection(w, "Alive", FilterResultsAlive(filtered), printAliveResult) } } // outputFlatResults prints results as a flat list. -func outputFlatResults(filtered []checker.Result) { +func outputFlatResults(w io.Writer, filtered []checker.Result) { for _, r := range filtered { - printResult(r) + printResult(w, r) } } // maybeShowIgnored shows ignored URLs if the flag is set and filter exists. -func maybeShowIgnored(urlFilter *filter.Filter) { - if showIgnored && urlFilter != nil { - printIgnoredURLs(urlFilter) +func maybeShowIgnored(w io.Writer, urlFilter *filter.Filter, opts checkRenderOptions) { + if opts.ShowIgnored && urlFilter != nil { + printIgnoredURLs(w, urlFilter) } } diff --git a/cmd/check_print.go b/cmd/check_print.go index c1ad7a1..379e56a 100644 --- a/cmd/check_print.go +++ b/cmd/check_print.go @@ -2,6 +2,7 @@ package cmd import ( "fmt" + "io" "strings" "github.com/leonardomso/gone/internal/checker" @@ -11,7 +12,7 @@ import ( // printProgressMessage displays the scanning progress with ignore info. // Shows the total links found, unique URLs being checked, and counts for duplicates/ignored. -func printProgressMessage(total, afterFilter, unique, duplicates, ignored int) { +func printProgressMessage(w io.Writer, total, afterFilter, unique, duplicates, ignored int) { var parts []string if duplicates > 0 { @@ -22,131 +23,131 @@ func printProgressMessage(total, afterFilter, unique, duplicates, ignored int) { } if len(parts) > 0 { - fmt.Printf("Found %d link(s), checking %d unique URLs (%s)...\n", + fmt.Fprintf(w, "Found %d link(s), checking %d unique URLs (%s)...\n", total, unique, strings.Join(parts, ", ")) } else { - fmt.Printf("Found %d link(s), checking...\n", afterFilter) + fmt.Fprintf(w, "Found %d link(s), checking...\n", afterFilter) } } // printSection prints a titled section of results if any exist. // Uses the provided printer function to format each individual result. -func printSection(title string, results []checker.Result, printer func(checker.Result)) { +func printSection(w io.Writer, title string, results []checker.Result, printer func(io.Writer, checker.Result)) { if len(results) == 0 { return } - fmt.Printf("=== %s (%d) ===\n\n", title, len(results)) + fmt.Fprintf(w, "=== %s (%d) ===\n\n", title, len(results)) for _, r := range results { - printer(r) + printer(w, r) } - fmt.Println() + fmt.Fprintln(w) } // printResult dispatches to the appropriate printer based on result status. -func printResult(r checker.Result) { +func printResult(w io.Writer, r checker.Result) { switch r.Status { case checker.StatusAlive: - printAliveResult(r) + printAliveResult(w, r) case checker.StatusRedirect, checker.StatusBlocked: - printWarningResult(r) + printWarningResult(w, r) case checker.StatusDead, checker.StatusError: - printDeadResult(r) + printDeadResult(w, r) case checker.StatusDuplicate: - printDuplicateResult(r) + printDuplicateResult(w, r) } } // printAliveResult formats and prints a result with alive status. -func printAliveResult(r checker.Result) { - fmt.Printf(" [%d] %s\n", r.StatusCode, r.Link.URL) +func printAliveResult(w io.Writer, r checker.Result) { + fmt.Fprintf(w, " [%d] %s\n", r.StatusCode, r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Printf(" Text: %q\n", text) + fmt.Fprintf(w, " Text: %q\n", text) } - fmt.Printf(" File: %s", r.Link.FilePath) + fmt.Fprintf(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Printf(":%d", r.Link.Line) + fmt.Fprintf(w, ":%d", r.Link.Line) } - fmt.Println() - fmt.Println() + fmt.Fprintln(w) + fmt.Fprintln(w) } // printWarningResult formats and prints a result with warning status (redirect or blocked). -func printWarningResult(r checker.Result) { - fmt.Printf(" %s %s\n", r.StatusDisplay(), r.Link.URL) +func printWarningResult(w io.Writer, r checker.Result) { + fmt.Fprintf(w, " %s %s\n", r.StatusDisplay(), r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Printf(" Text: %q\n", text) + fmt.Fprintf(w, " Text: %q\n", text) } if r.Status == checker.StatusRedirect && len(r.RedirectChain) > 0 { - fmt.Printf(" Chain: %s\n", formatRedirectChain(r)) - fmt.Printf(" Final: %s\n", r.FinalURL) + fmt.Fprintf(w, " Chain: %s\n", formatRedirectChain(r)) + fmt.Fprintf(w, " Final: %s\n", r.FinalURL) } - fmt.Printf(" File: %s", r.Link.FilePath) + fmt.Fprintf(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Printf(":%d", r.Link.Line) + fmt.Fprintf(w, ":%d", r.Link.Line) } - fmt.Println() - fmt.Printf(" Note: %s\n\n", r.Status.Description()) + fmt.Fprintln(w) + fmt.Fprintf(w, " Note: %s\n\n", r.Status.Description()) } // printDeadResult formats and prints a result with dead or error status. -func printDeadResult(r checker.Result) { - fmt.Printf(" %s %s\n", r.StatusDisplay(), r.Link.URL) +func printDeadResult(w io.Writer, r checker.Result) { + fmt.Fprintf(w, " %s %s\n", r.StatusDisplay(), r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Printf(" Text: %q\n", text) + fmt.Fprintf(w, " Text: %q\n", text) } - fmt.Printf(" File: %s", r.Link.FilePath) + fmt.Fprintf(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Printf(":%d", r.Link.Line) + fmt.Fprintf(w, ":%d", r.Link.Line) } - fmt.Println() + fmt.Fprintln(w) if r.Error != "" { - fmt.Printf(" Error: %s\n", r.Error) + fmt.Fprintf(w, " Error: %s\n", r.Error) } - fmt.Println() + fmt.Fprintln(w) } // printDuplicateResult formats and prints a result that is a duplicate of another link. -func printDuplicateResult(r checker.Result) { - fmt.Printf(" [DUPLICATE] %s\n", r.Link.URL) +func printDuplicateResult(w io.Writer, r checker.Result) { + fmt.Fprintf(w, " [DUPLICATE] %s\n", r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Printf(" Text: %q\n", text) + fmt.Fprintf(w, " Text: %q\n", text) } - fmt.Printf(" File: %s", r.Link.FilePath) + fmt.Fprintf(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Printf(":%d", r.Link.Line) + fmt.Fprintf(w, ":%d", r.Link.Line) } - fmt.Println() + fmt.Fprintln(w) if r.DuplicateOf != nil { - fmt.Printf(" Same as: %s", r.DuplicateOf.Link.FilePath) + fmt.Fprintf(w, " Same as: %s", r.DuplicateOf.Link.FilePath) if r.DuplicateOf.Link.Line > 0 { - fmt.Printf(":%d", r.DuplicateOf.Link.Line) + fmt.Fprintf(w, ":%d", r.DuplicateOf.Link.Line) } - fmt.Printf(" → Status: %s\n", r.DuplicateOf.Status.Label()) + fmt.Fprintf(w, " → Status: %s\n", r.DuplicateOf.Status.Label()) } - fmt.Println() + fmt.Fprintln(w) } // printIgnoredURLs displays the list of URLs that were ignored by filter rules. -func printIgnoredURLs(urlFilter *filter.Filter) { +func printIgnoredURLs(w io.Writer, urlFilter *filter.Filter) { ignored := urlFilter.IgnoredURLs() if len(ignored) == 0 { return } - fmt.Printf("\n=== Ignored URLs (%d) ===\n\n", len(ignored)) + fmt.Fprintf(w, "\n=== Ignored URLs (%d) ===\n\n", len(ignored)) for _, ig := range ignored { - fmt.Printf(" [IGNORED] %s\n", ig.URL) - fmt.Printf(" File: %s", ig.File) + fmt.Fprintf(w, " [IGNORED] %s\n", ig.URL) + fmt.Fprintf(w, " File: %s", ig.File) if ig.Line > 0 { - fmt.Printf(":%d", ig.Line) + fmt.Fprintf(w, ":%d", ig.Line) } - fmt.Println() - fmt.Printf(" Reason: %s %q\n\n", ig.Type, ig.Rule) + fmt.Fprintln(w) + fmt.Fprintf(w, " Reason: %s %q\n\n", ig.Type, ig.Rule) } } diff --git a/cmd/check_runner.go b/cmd/check_runner.go new file mode 100644 index 0000000..58fde6b --- /dev/null +++ b/cmd/check_runner.go @@ -0,0 +1,332 @@ +package cmd + +import ( + "fmt" + "strings" + + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/filter" + "github.com/leonardomso/gone/internal/output" + "github.com/leonardomso/gone/internal/stats" +) + +type CheckOptions struct { + OutputFormat string + OutputFile string + Concurrency int + Timeout int + Retries int + ShowAlive bool + ShowWarnings bool + ShowDead bool + ShowAll bool + ShowStats bool + FileTypes []string + StrictMode bool + IgnoreDomains []string + IgnorePatterns []string + IgnoreRegex []string + ShowIgnored bool + NoConfig bool +} + +type checkRunner struct { + opts CheckOptions + env CommandEnv + io IOStreams +} + +func newCheckRunner(opts CheckOptions, env CommandEnv, streams IOStreams) *checkRunner { + return &checkRunner{ + opts: opts, + env: env, + io: streams, + } +} + +func currentCheckOptions() CheckOptions { + return CheckOptions{ + OutputFormat: outputFormat, + OutputFile: outputFile, + Concurrency: concurrency, + Timeout: timeout, + Retries: retries, + ShowAlive: showAlive, + ShowWarnings: showWarnings, + ShowDead: showDead, + ShowAll: showAll, + ShowStats: showStats, + FileTypes: append([]string{}, fileTypes...), + StrictMode: strictMode, + IgnoreDomains: append([]string{}, ignoreDomains...), + IgnorePatterns: append([]string{}, ignorePatterns...), + IgnoreRegex: append([]string{}, ignoreRegex...), + ShowIgnored: showIgnored, + NoConfig: noConfig, + } +} + +func (r *checkRunner) renderOptions() checkRenderOptions { + return checkRenderOptions{ + ShowAlive: r.opts.ShowAlive, + ShowWarnings: r.opts.ShowWarnings, + ShowDead: r.opts.ShowDead, + ShowAll: r.opts.ShowAll, + ShowIgnored: r.opts.ShowIgnored, + } +} + +func (r *checkRunner) Run(args []string) int { + perf := r.env.NewStats() + if err := r.validateFlags(); err != nil { + return r.fail(1, "Invalid flags", err) + } + + loadedCfg, err := r.env.LoadConfig(r.opts.NoConfig) + if err != nil { + return r.fail(1, "Config error", err) + } + + path := getPathArg(args) + effectiveFormat := loadedCfg.GetOutputFormat(r.opts.OutputFormat) + useStructuredOutput := effectiveFormat != "" + + files, err := r.scanFilesWithConfig(path, loadedCfg, perf, useStructuredOutput) + if err != nil { + return r.fail(1, "Error scanning directory", err) + } + + links, urlFilter, done, err := r.parseAndFilterLinksWithConfig(files, loadedCfg, perf, useStructuredOutput) + if err != nil { + return r.fail(1, err.Error(), nil) + } + if done { + return 0 + } + + results, summary := r.checkLinksWithConfig(links, loadedCfg, perf) + + if err := r.routeOutputWithConfig( + files, results, summary, urlFilter, perf, + useStructuredOutput, effectiveFormat, loadedCfg.GetShowStats(r.opts.ShowStats), + ); err != nil { + return r.fail(1, err.Error(), nil) + } + + if summary.HasDeadLinks() { + return 1 + } + return 0 +} + +func (r *checkRunner) fail(code int, message string, err error) int { + if err != nil { + if message != "" { + fmt.Fprintf(r.io.ErrOut, "%s: %v\n", message, err) + } else { + fmt.Fprintf(r.io.ErrOut, "%v\n", err) + } + return code + } + + if message != "" { + fmt.Fprintln(r.io.ErrOut, message) + } + return code +} + +func (r *checkRunner) validateFlags() error { + if r.opts.OutputFormat != "" && r.opts.OutputFile != "" { + return fmt.Errorf("--format and --output are mutually exclusive; " + + "use --format for stdout output, or --output for file output") + } + + if r.opts.OutputFormat != "" && !output.IsValidFormat(r.opts.OutputFormat) { + return fmt.Errorf("invalid format %q; valid formats: %s", + r.opts.OutputFormat, strings.Join(output.ValidFormats(), ", ")) + } + + return nil +} + +func (r *checkRunner) scanFilesWithConfig( + path string, cfg *LoadedConfig, perf *stats.Stats, useStructuredOutput bool, +) ([]string, error) { + perf.StartScan() + + effectiveTypes := cfg.GetTypes(r.opts.FileTypes, []string{"md"}) + if err := validateFileTypes(effectiveTypes); err != nil { + return nil, fmt.Errorf("Invalid file types: %w", err) + } + + scanOpts := cfg.BuildScanOptions(path, r.opts.FileTypes, []string{"md"}) + files, err := r.env.FindFiles(scanOpts) + if err != nil { + return nil, err + } + + perf.EndScan(len(files)) + + if !useStructuredOutput { + fmt.Fprintf(r.io.Out, "Found %d file(s) of type(s): %s\n", len(files), strings.Join(effectiveTypes, ", ")) + } + + return files, nil +} + +func (r *checkRunner) parseAndFilterLinksWithConfig( + files []string, cfg *LoadedConfig, perf *stats.Stats, useStructuredOutput bool, +) ([]checker.Link, *filter.Filter, bool, error) { + perf.StartParse() + + parserLinks, err := r.env.ExtractLinks(files, cfg.GetStrict(r.opts.StrictMode)) + if err != nil { + return nil, nil, false, fmt.Errorf("Error parsing files: %w", err) + } + + if len(parserLinks) == 0 { + perf.EndParse(0, 0, 0, 0) + if err := r.handleEmptyLinksWithStats(files, useStructuredOutput, perf, cfg.GetOutputFormat(r.opts.OutputFormat), cfg.GetShowStats(r.opts.ShowStats)); err != nil { + return nil, nil, false, fmt.Errorf("Error formatting output: %w", err) + } + return nil, nil, true, nil + } + + urlFilter, err := r.env.CreateFilterWithConfig(cfg.Config(), r.opts.IgnoreDomains, r.opts.IgnorePatterns, r.opts.IgnoreRegex) + if err != nil { + return nil, nil, false, fmt.Errorf("Error creating filter: %w", err) + } + + links := FilterParserLinks(parserLinks, urlFilter) + ignoredCount := getIgnoredCount(urlFilter) + uniqueURLs := CountUniqueURLs(links) + duplicates := len(links) - uniqueURLs + perf.EndParse(len(parserLinks), uniqueURLs, duplicates, ignoredCount) + + if !useStructuredOutput { + printProgressMessage(r.io.Out, len(parserLinks), len(links), uniqueURLs, duplicates, ignoredCount) + } + + if len(links) == 0 { + if err := r.handleAllFilteredWithStats(files, useStructuredOutput, urlFilter, perf, cfg.GetOutputFormat(r.opts.OutputFormat), cfg.GetShowStats(r.opts.ShowStats)); err != nil { + return nil, nil, false, fmt.Errorf("Error formatting output: %w", err) + } + return nil, urlFilter, true, nil + } + + return links, urlFilter, false, nil +} + +func (r *checkRunner) checkLinksWithConfig( + links []checker.Link, cfg *LoadedConfig, perf *stats.Stats, +) ([]checker.Result, checker.Summary) { + perf.StartCheck() + c := r.env.NewChecker(cfg.BuildCheckerOptions(r.opts.Concurrency, r.opts.Timeout, r.opts.Retries)) + results := c.CheckAll(links) + summary := checker.Summarize(results) + perf.EndCheck() + return results, summary +} + +func (r *checkRunner) routeOutputWithConfig( + files []string, results []checker.Result, summary checker.Summary, + urlFilter *filter.Filter, perf *stats.Stats, useStructuredOutput bool, + effectiveFormat string, effectiveShowStats bool, +) error { + switch { + case useStructuredOutput: + return r.handleStructuredOutputWithStats(files, results, summary, urlFilter, perf, effectiveFormat, effectiveShowStats) + case r.opts.OutputFile != "": + return r.handleFileOutputWithStats(files, results, summary, urlFilter, perf, effectiveShowStats) + default: + outputText(r.io.Out, results, summary, urlFilter, r.renderOptions()) + if effectiveShowStats { + fmt.Fprint(r.io.Out, perf.String()) + } + return nil + } +} + +func (r *checkRunner) handleEmptyLinksWithStats( + files []string, useStructuredOutput bool, perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, +) error { + switch { + case useStructuredOutput: + return r.handleStructuredOutputWithStats(files, nil, checker.Summary{}, nil, perf, effectiveFormat, effectiveShowStats) + case r.opts.OutputFile != "": + return r.handleFileOutputWithStats(files, nil, checker.Summary{}, nil, perf, effectiveShowStats) + default: + fmt.Fprintln(r.io.Out, "No links found.") + if effectiveShowStats { + fmt.Fprint(r.io.Out, perf.String()) + } + return nil + } +} + +func (r *checkRunner) handleAllFilteredWithStats( + files []string, useStructuredOutput bool, urlFilter *filter.Filter, + perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, +) error { + switch { + case useStructuredOutput: + return r.handleStructuredOutputWithStats(files, nil, checker.Summary{}, urlFilter, perf, effectiveFormat, effectiveShowStats) + case r.opts.OutputFile != "": + return r.handleFileOutputWithStats(files, nil, checker.Summary{}, urlFilter, perf, effectiveShowStats) + default: + fmt.Fprintln(r.io.Out, "\nAll links were ignored by filter rules.") + if r.opts.ShowIgnored && urlFilter != nil { + printIgnoredURLs(r.io.Out, urlFilter) + } + if effectiveShowStats { + fmt.Fprint(r.io.Out, perf.String()) + } + return nil + } +} + +func (r *checkRunner) handleStructuredOutputWithStats( + files []string, results []checker.Result, summary checker.Summary, + urlFilter *filter.Filter, perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, +) error { + report := buildReport(r.env.Now(), files, results, summary, urlFilter, r.renderOptions()) + if effectiveShowStats && perf != nil { + report.Stats = perf.ToJSON() + } + + data, err := r.env.FormatReport(report, output.Format(effectiveFormat)) + if err != nil { + return fmt.Errorf("Error formatting output: %w", err) + } + + _, err = r.io.Out.Write(data) + return err +} + +func (r *checkRunner) handleFileOutputWithStats( + files []string, results []checker.Result, summary checker.Summary, + urlFilter *filter.Filter, perf *stats.Stats, effectiveShowStats bool, +) error { + report := buildReport(r.env.Now(), files, results, summary, urlFilter, r.renderOptions()) + if effectiveShowStats && perf != nil { + report.Stats = perf.ToJSON() + } + + if err := r.env.WriteToFile(report, r.opts.OutputFile); err != nil { + return fmt.Errorf("Error writing file: %w", err) + } + + fmt.Fprintf(r.io.Out, "Wrote report to %s\n", r.opts.OutputFile) + fmt.Fprintf(r.io.Out, "\nSummary: %d alive | %d warnings | %d dead | %d duplicates", + summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates) + if urlFilter != nil && urlFilter.IgnoredCount() > 0 { + fmt.Fprintf(r.io.Out, " | %d ignored", urlFilter.IgnoredCount()) + } + fmt.Fprintln(r.io.Out) + + if effectiveShowStats { + fmt.Fprint(r.io.Out, perf.String()) + } + + return nil +} diff --git a/cmd/e2e_bench_test.go b/cmd/e2e_bench_test.go new file mode 100644 index 0000000..1f2df3d --- /dev/null +++ b/cmd/e2e_bench_test.go @@ -0,0 +1,230 @@ +package cmd + +import ( + "io/fs" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/filter" + "github.com/leonardomso/gone/internal/fixer" + "github.com/leonardomso/gone/internal/parser" + "github.com/leonardomso/gone/internal/scanner" +) + +var ( + benchParserLinks []parser.Link + benchCheckerLinks []checker.Link + benchCheckResults []checker.Result + benchFixChanges []fixer.FileChanges + benchPreview string +) + +func BenchmarkPipeline_ScanParseOnly(b *testing.B) { + for _, fixture := range []string{"small", "medium", "large"} { + b.Run(fixture, func(b *testing.B) { + root := benchmarkFixturePath(fixture) + opts := benchmarkScanOptions(root) + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + files, err := scanner.FindFilesWithOptions(opts) + if err != nil { + b.Fatal(err) + } + + links, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, false) + if err != nil { + b.Fatal(err) + } + + benchParserLinks = links + } + }) + } +} + +func BenchmarkPipeline_ParseFilterOnly(b *testing.B) { + root := benchmarkFixturePath("duplicate-heavy") + files := mustScanBenchmarkFiles(b, root) + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + parserLinks, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, false) + if err != nil { + b.Fatal(err) + } + + urlFilter, err := filter.New(filter.Config{ + Domains: []string{"ignored.example"}, + }) + if err != nil { + b.Fatal(err) + } + + benchCheckerLinks = FilterParserLinks(parserLinks, urlFilter) + } +} + +func BenchmarkPipeline_FullCheck(b *testing.B) { + server := newBenchmarkHTTPServer() + defer server.Close() + + root := materializeBenchmarkFixture(b, "http-scenarios", map[string]string{ + "{{ALIVE_URL}}": server.URL + "/alive", + "{{REDIRECT_URL}}": server.URL + "/redirect", + "{{DEAD_URL}}": server.URL + "/dead", + "{{ERROR_URL}}": server.URL + "/error", + }) + opts := benchmarkScanOptions(root) + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + files, err := scanner.FindFilesWithOptions(opts) + if err != nil { + b.Fatal(err) + } + + parserLinks, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, false) + if err != nil { + b.Fatal(err) + } + + c := checker.New( + checker.DefaultOptions(). + WithConcurrency(16). + WithTimeout(2 * time.Second). + WithMaxRetries(0), + ) + benchCheckResults = c.CheckAll(ConvertParserLinks(parserLinks)) + } +} + +func BenchmarkPipeline_FixDryRun(b *testing.B) { + server := newBenchmarkHTTPServer() + defer server.Close() + + root := materializeBenchmarkFixture(b, "http-scenarios", map[string]string{ + "{{ALIVE_URL}}": server.URL + "/alive", + "{{REDIRECT_URL}}": server.URL + "/redirect", + "{{DEAD_URL}}": server.URL + "/dead", + "{{ERROR_URL}}": server.URL + "/error", + }) + opts := benchmarkScanOptions(root) + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + files, err := scanner.FindFilesWithOptions(opts) + if err != nil { + b.Fatal(err) + } + + parserLinks, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, false) + if err != nil { + b.Fatal(err) + } + + c := checker.New( + checker.DefaultOptions(). + WithConcurrency(16). + WithTimeout(2 * time.Second). + WithMaxRetries(0), + ) + results := c.CheckAll(ConvertParserLinks(parserLinks)) + + f := fixer.New() + f.SetParserLinks(parserLinks) + benchFixChanges = f.FindFixes(results) + benchPreview = f.Preview(benchFixChanges) + } +} + +func benchmarkFixturePath(name string) string { + return filepath.Join("..", "testdata", "bench", name) +} + +func benchmarkScanOptions(root string) scanner.ScanOptions { + return scanner.ScanOptions{ + Root: root, + Types: []string{"md", "json", "yaml", "toml", "xml"}, + } +} + +func mustScanBenchmarkFiles(b *testing.B, root string) []string { + b.Helper() + + files, err := scanner.FindFilesWithOptions(benchmarkScanOptions(root)) + if err != nil { + b.Fatal(err) + } + return files +} + +func materializeBenchmarkFixture(b *testing.B, name string, replacements map[string]string) string { + b.Helper() + + srcRoot := benchmarkFixturePath(name) + dstRoot := b.TempDir() + + err := filepath.WalkDir(srcRoot, func(path string, d fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + + relPath, err := filepath.Rel(srcRoot, path) + if err != nil { + return err + } + if relPath == "." { + return nil + } + + dstPath := filepath.Join(dstRoot, relPath) + if d.IsDir() { + return os.MkdirAll(dstPath, 0o755) + } + + content, err := os.ReadFile(path) + if err != nil { + return err + } + + rewritten := string(content) + for oldValue, newValue := range replacements { + rewritten = strings.ReplaceAll(rewritten, oldValue, newValue) + } + + return os.WriteFile(dstPath, []byte(rewritten), 0o644) + }) + if err != nil { + b.Fatal(err) + } + + return dstRoot +} + +func newBenchmarkHTTPServer() *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/alive": + w.WriteHeader(http.StatusOK) + case "/redirect": + http.Redirect(w, r, "/alive", http.StatusMovedPermanently) + case "/dead": + w.WriteHeader(http.StatusNotFound) + case "/error": + w.WriteHeader(http.StatusInternalServerError) + default: + w.WriteHeader(http.StatusNotFound) + } + })) +} diff --git a/cmd/fix.go b/cmd/fix.go index 10192a8..3086f1c 100644 --- a/cmd/fix.go +++ b/cmd/fix.go @@ -1,16 +1,9 @@ package cmd import ( - "bufio" - "fmt" "os" - "strings" "github.com/leonardomso/gone/internal/checker" - "github.com/leonardomso/gone/internal/fixer" - "github.com/leonardomso/gone/internal/parser" - "github.com/leonardomso/gone/internal/scanner" - "github.com/leonardomso/gone/internal/stats" "github.com/spf13/cobra" ) @@ -112,302 +105,8 @@ func init() { // runFix is the main entry point for the fix command. // It scans for redirects and applies fixes interactively or automatically. func runFix(_ *cobra.Command, args []string) { - // Initialize stats tracking - perf := stats.New() - - // Load configuration - loadedCfg, err := LoadConfig(fixNoConfig) - if err != nil { - fmt.Fprintf(os.Stderr, "Config error: %v\n", err) - os.Exit(1) - } - - // Determine the path to scan - path := "." - if len(args) > 0 { - path = args[0] - } - - // Get effective file types from config - effectiveTypes := loadedCfg.GetTypes(fixFileTypes, []string{"md"}) - - // Validate file types - supportedTypes := parser.SupportedFileTypes() - supported := make(map[string]bool, len(supportedTypes)) - for _, t := range supportedTypes { - supported[t] = true - } - for _, t := range effectiveTypes { - if !supported[strings.ToLower(t)] { - fmt.Fprintf(os.Stderr, "Error: unsupported file type: %s (supported: %s)\n", - t, strings.Join(supportedTypes, ", ")) - os.Exit(1) - } - } - - // Phase 1: Scan for files with config - perf.StartScan() - scanOpts := loadedCfg.BuildScanOptions(path, fixFileTypes, []string{"md"}) - files, err := scanner.FindFilesWithOptions(scanOpts) - if err != nil { - fmt.Fprintf(os.Stderr, "Error scanning directory: %v\n", err) - os.Exit(1) - } - perf.EndScan(len(files)) - - typeStr := strings.Join(effectiveTypes, ", ") - fmt.Printf("Found %d file(s) of type(s): %s\n", len(files), typeStr) - - // Get effective strict mode - effectiveStrict := loadedCfg.GetStrict(fixStrictMode) - - // Phase 2: Parse links - perf.StartParse() - parserLinks, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, effectiveStrict) - if err != nil { - fmt.Fprintf(os.Stderr, "Error parsing files: %v\n", err) - os.Exit(1) - } - - // Get effective show stats - effectiveShowStats := loadedCfg.GetShowStats(fixShowStats) - - if len(parserLinks) == 0 { - perf.EndParse(0, 0, 0, 0) - fmt.Println("No links found.") - if effectiveShowStats { - fmt.Print(perf.String()) - } - return - } - - // Load and create filter using config + CLI - urlFilter, err := CreateFilterWithConfig(loadedCfg.Config(), fixIgnoreDomains, fixIgnorePatterns, fixIgnoreRegex) - if err != nil { - fmt.Fprintf(os.Stderr, "Error creating filter: %v\n", err) - os.Exit(1) - } - - // Convert parser.Link to checker.Link, applying filter - links := FilterParserLinks(parserLinks, urlFilter) - - ignoredCount := 0 - if urlFilter != nil { - ignoredCount = urlFilter.IgnoredCount() - } - - // Count unique URLs using shared helper - uniqueURLs := CountUniqueURLs(links) - duplicates := len(links) - uniqueURLs - - perf.EndParse(len(parserLinks), uniqueURLs, duplicates, ignoredCount) - - if len(links) == 0 { - fmt.Println("All links were ignored by filter rules.") - if effectiveShowStats { - fmt.Print(perf.String()) - } - return - } - - fmt.Printf("Checking %d unique URL(s) for redirects...\n", uniqueURLs) - - // Phase 3: Check URLs - perf.StartCheck() - - // Create checker with config values - opts := loadedCfg.BuildCheckerOptions(fixConcurrency, fixTimeout, fixRetries) - - c := checker.New(opts) - results := c.CheckAll(links) - - perf.EndCheck() - - // Create fixer and find fixable items - f := fixer.New() - f.SetParserLinks(parserLinks) - changes := f.FindFixes(results) - - if len(changes) == 0 { - fmt.Println("\nNo fixable redirects found.") - printFixSummary(results) - if effectiveShowStats { - fmt.Print(perf.String()) - } - return - } - - // Show preview - fmt.Println() - fmt.Print(f.Preview(changes)) - - // Handle dry-run mode - if fixDryRun { - fmt.Println("Dry-run mode: no files were modified.") - if effectiveShowStats { - fmt.Print(perf.String()) - } - return - } - - // Handle automatic mode - if fixYes { - applyAllFixes(f, changes) - if effectiveShowStats { - fmt.Print(perf.String()) - } - return - } - - // Interactive mode - runInteractiveFix(f, changes) - if effectiveShowStats { - fmt.Print(perf.String()) - } -} - -// applyAllFixes applies all fixes without prompting. -func applyAllFixes(f *fixer.Fixer, changes []fixer.FileChanges) { - results := f.ApplyAll(changes) - fmt.Println(fixer.DetailedSummary(results)) -} - -// runInteractiveFix prompts the user for each file before applying fixes. -func runInteractiveFix(f *fixer.Fixer, changes []fixer.FileChanges) { - reader := bufio.NewReader(os.Stdin) - var allResults []fixer.FixResult - applyAll := false - - for i := 0; i < len(changes); i++ { - fc := changes[i] - - if applyAll { - result, _ := f.ApplyToFile(fc) - allResults = append(allResults, *result) - continue - } - - // Prompt for this file - fmt.Printf("\nFix %s? (%d change(s)) [y/n/a/q/?] ", - fc.FilePath, fc.TotalFixes) - - input, err := reader.ReadString('\n') - if err != nil { - fmt.Fprintf(os.Stderr, "\nError reading input: %v\n", err) - os.Exit(1) - } - - input = strings.TrimSpace(strings.ToLower(input)) - - switch input { - case "y", "yes": - result, applyErr := f.ApplyToFile(fc) - if applyErr != nil { - fmt.Fprintf(os.Stderr, "Error: %v\n", applyErr) - } else { - fmt.Printf("Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) - } - allResults = append(allResults, *result) - - case "n", "no": - fmt.Printf("Skipped %s\n", fc.FilePath) - allResults = append(allResults, fixer.FixResult{ - FilePath: fc.FilePath, - Skipped: fc.TotalFixes, - }) - - case "a", "all": - // Apply this file and all remaining - result, applyErr := f.ApplyToFile(fc) - if applyErr != nil { - fmt.Fprintf(os.Stderr, "Error: %v\n", applyErr) - } else { - fmt.Printf("Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) - } - allResults = append(allResults, *result) - applyAll = true - - case "q", "quit": - fmt.Println("\nQuitting. Remaining files were not modified.") - // Add remaining as skipped - for j := i; j < len(changes); j++ { - allResults = append(allResults, fixer.FixResult{ - FilePath: changes[j].FilePath, - Skipped: changes[j].TotalFixes, - }) - } - printInteractiveResults(allResults) - os.Exit(2) - - case "?", "help": - printInteractiveHelp() - i-- // Re-prompt for this file - - default: - fmt.Println("Invalid input. Use y/n/a/q/? (or type 'help')") - i-- // Retry this file - } - } - - fmt.Println() - printInteractiveResults(allResults) -} - -// printInteractiveHelp displays help for interactive mode options. -func printInteractiveHelp() { - fmt.Println(` -Interactive mode options: - y, yes - Fix this file - n, no - Skip this file - a, all - Fix this file and all remaining files - q, quit - Quit without fixing remaining files - ?, help - Show this help`) -} - -// printInteractiveResults displays a summary of the interactive session. -func printInteractiveResults(results []fixer.FixResult) { - applied := 0 - skipped := 0 - filesModified := 0 - filesSkipped := 0 - - for _, r := range results { - applied += r.Applied - skipped += r.Skipped - if r.Applied > 0 { - filesModified++ - } - if r.Skipped > 0 && r.Applied == 0 { - filesSkipped++ - } - } - - if applied > 0 { - fmt.Printf("Fixed %d redirect(s) across %d file(s).\n", applied, filesModified) - } - if filesSkipped > 0 { - fmt.Printf("Skipped %d file(s).\n", filesSkipped) - } -} - -// printFixSummary displays a summary of check results for context. -func printFixSummary(results []checker.Result) { - summary := checker.Summarize(results) - - fmt.Printf("\nLink status: %d alive | %d redirects | %d dead | %d errors\n", - summary.Alive, summary.Redirects, summary.Dead, summary.Errors) - - if summary.Redirects > 0 { - // Count how many redirects are not fixable (final status != 200) - notFixable := 0 - for _, r := range results { - if r.Status == checker.StatusRedirect && r.FinalStatus != 200 { - notFixable++ - } - } - if notFixable > 0 { - fmt.Printf("Note: %d redirect(s) lead to non-200 responses and cannot be auto-fixed.\n", - notFixable) - } + code := newFixRunner(currentFixOptions(), defaultCommandEnv(), defaultIOStreams()).Run(args) + if code != 0 { + os.Exit(code) } } diff --git a/cmd/fix_runner.go b/cmd/fix_runner.go new file mode 100644 index 0000000..dedc111 --- /dev/null +++ b/cmd/fix_runner.go @@ -0,0 +1,292 @@ +package cmd + +import ( + "bufio" + "fmt" + "io" + "strings" + + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/fixer" +) + +type FixOptions struct { + Yes bool + DryRun bool + Concurrency int + Timeout int + Retries int + ShowStats bool + FileTypes []string + StrictMode bool + IgnoreDomains []string + IgnorePatterns []string + IgnoreRegex []string + NoConfig bool +} + +type fixRunner struct { + opts FixOptions + env CommandEnv + io IOStreams +} + +func newFixRunner(opts FixOptions, env CommandEnv, streams IOStreams) *fixRunner { + return &fixRunner{ + opts: opts, + env: env, + io: streams, + } +} + +func currentFixOptions() FixOptions { + return FixOptions{ + Yes: fixYes, + DryRun: fixDryRun, + Concurrency: fixConcurrency, + Timeout: fixTimeout, + Retries: fixRetries, + ShowStats: fixShowStats, + FileTypes: append([]string{}, fixFileTypes...), + StrictMode: fixStrictMode, + IgnoreDomains: append([]string{}, fixIgnoreDomains...), + IgnorePatterns: append([]string{}, fixIgnorePatterns...), + IgnoreRegex: append([]string{}, fixIgnoreRegex...), + NoConfig: fixNoConfig, + } +} + +func (r *fixRunner) Run(args []string) int { + perf := r.env.NewStats() + + loadedCfg, err := r.env.LoadConfig(r.opts.NoConfig) + if err != nil { + fmt.Fprintf(r.io.ErrOut, "Config error: %v\n", err) + return 1 + } + + path := getPathArg(args) + effectiveTypes := loadedCfg.GetTypes(r.opts.FileTypes, []string{"md"}) + if err := validateFileTypes(effectiveTypes); err != nil { + fmt.Fprintf(r.io.ErrOut, "Error: %v\n", err) + return 1 + } + + perf.StartScan() + files, err := r.env.FindFiles(loadedCfg.BuildScanOptions(path, r.opts.FileTypes, []string{"md"})) + if err != nil { + fmt.Fprintf(r.io.ErrOut, "Error scanning directory: %v\n", err) + return 1 + } + perf.EndScan(len(files)) + + fmt.Fprintf(r.io.Out, "Found %d file(s) of type(s): %s\n", len(files), strings.Join(effectiveTypes, ", ")) + + perf.StartParse() + parserLinks, err := r.env.ExtractLinks(files, loadedCfg.GetStrict(r.opts.StrictMode)) + if err != nil { + fmt.Fprintf(r.io.ErrOut, "Error parsing files: %v\n", err) + return 1 + } + + if len(parserLinks) == 0 { + perf.EndParse(0, 0, 0, 0) + fmt.Fprintln(r.io.Out, "No links found.") + if loadedCfg.GetShowStats(r.opts.ShowStats) { + fmt.Fprint(r.io.Out, perf.String()) + } + return 0 + } + + urlFilter, err := r.env.CreateFilterWithConfig(loadedCfg.Config(), r.opts.IgnoreDomains, r.opts.IgnorePatterns, r.opts.IgnoreRegex) + if err != nil { + fmt.Fprintf(r.io.ErrOut, "Error creating filter: %v\n", err) + return 1 + } + + links := FilterParserLinks(parserLinks, urlFilter) + ignoredCount := getIgnoredCount(urlFilter) + uniqueURLs := CountUniqueURLs(links) + duplicates := len(links) - uniqueURLs + perf.EndParse(len(parserLinks), uniqueURLs, duplicates, ignoredCount) + + if len(links) == 0 { + fmt.Fprintln(r.io.Out, "All links were ignored by filter rules.") + if loadedCfg.GetShowStats(r.opts.ShowStats) { + fmt.Fprint(r.io.Out, perf.String()) + } + return 0 + } + + fmt.Fprintf(r.io.Out, "Checking %d unique URL(s) for redirects...\n", uniqueURLs) + + perf.StartCheck() + results := r.env.NewChecker(loadedCfg.BuildCheckerOptions(r.opts.Concurrency, r.opts.Timeout, r.opts.Retries)).CheckAll(links) + perf.EndCheck() + + f := r.env.NewFixer() + f.SetParserLinks(parserLinks) + changes := f.FindFixes(results) + + if len(changes) == 0 { + fmt.Fprintln(r.io.Out, "\nNo fixable redirects found.") + printFixSummary(r.io.Out, results) + if loadedCfg.GetShowStats(r.opts.ShowStats) { + fmt.Fprint(r.io.Out, perf.String()) + } + return 0 + } + + fmt.Fprintln(r.io.Out) + fmt.Fprint(r.io.Out, f.Preview(changes)) + + if r.opts.DryRun { + fmt.Fprintln(r.io.Out, "Dry-run mode: no files were modified.") + if loadedCfg.GetShowStats(r.opts.ShowStats) { + fmt.Fprint(r.io.Out, perf.String()) + } + return 0 + } + + if r.opts.Yes { + applyAllFixes(r.io.Out, f, changes) + if loadedCfg.GetShowStats(r.opts.ShowStats) { + fmt.Fprint(r.io.Out, perf.String()) + } + return 0 + } + + code := runInteractiveFix(r.io, f, changes) + if loadedCfg.GetShowStats(r.opts.ShowStats) { + fmt.Fprint(r.io.Out, perf.String()) + } + return code +} + +func applyAllFixes(out io.Writer, f RedirectFixer, changes []fixer.FileChanges) { + results := f.ApplyAll(changes) + fmt.Fprintln(out, fixer.DetailedSummary(results)) +} + +func runInteractiveFix(streams IOStreams, f RedirectFixer, changes []fixer.FileChanges) int { + reader := bufio.NewReader(streams.In) + var allResults []fixer.FixResult + applyAll := false + + for i := 0; i < len(changes); i++ { + fc := changes[i] + + if applyAll { + result, _ := f.ApplyToFile(fc) + allResults = append(allResults, *result) + continue + } + + fmt.Fprintf(streams.Out, "\nFix %s? (%d change(s)) [y/n/a/q/?] ", fc.FilePath, fc.TotalFixes) + + input, err := reader.ReadString('\n') + if err != nil { + fmt.Fprintf(streams.ErrOut, "\nError reading input: %v\n", err) + return 1 + } + + switch strings.TrimSpace(strings.ToLower(input)) { + case "y", "yes": + result, applyErr := f.ApplyToFile(fc) + if applyErr != nil { + fmt.Fprintf(streams.ErrOut, "Error: %v\n", applyErr) + } else { + fmt.Fprintf(streams.Out, "Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) + } + allResults = append(allResults, *result) + case "n", "no": + fmt.Fprintf(streams.Out, "Skipped %s\n", fc.FilePath) + allResults = append(allResults, fixer.FixResult{ + FilePath: fc.FilePath, + Skipped: fc.TotalFixes, + }) + case "a", "all": + result, applyErr := f.ApplyToFile(fc) + if applyErr != nil { + fmt.Fprintf(streams.ErrOut, "Error: %v\n", applyErr) + } else { + fmt.Fprintf(streams.Out, "Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) + } + allResults = append(allResults, *result) + applyAll = true + case "q", "quit": + fmt.Fprintln(streams.Out, "\nQuitting. Remaining files were not modified.") + for j := i; j < len(changes); j++ { + allResults = append(allResults, fixer.FixResult{ + FilePath: changes[j].FilePath, + Skipped: changes[j].TotalFixes, + }) + } + printInteractiveResults(streams.Out, allResults) + return 2 + case "?", "help", "h": + printInteractiveHelp(streams.Out) + i-- + default: + fmt.Fprintln(streams.Out, "Invalid input. Use y/n/a/q/? (or type 'help')") + i-- + } + } + + fmt.Fprintln(streams.Out) + printInteractiveResults(streams.Out, allResults) + return 0 +} + +func printInteractiveHelp(out io.Writer) { + fmt.Fprintln(out, ` +Interactive mode options: + y, yes - Fix this file + n, no - Skip this file + a, all - Fix this file and all remaining files + q, quit - Quit without fixing remaining files + ?, help - Show this help`) +} + +func printInteractiveResults(out io.Writer, results []fixer.FixResult) { + applied := 0 + filesModified := 0 + filesSkipped := 0 + + for _, r := range results { + applied += r.Applied + if r.Applied > 0 { + filesModified++ + } + if r.Skipped > 0 && r.Applied == 0 { + filesSkipped++ + } + } + + if applied > 0 { + fmt.Fprintf(out, "Fixed %d redirect(s) across %d file(s).\n", applied, filesModified) + } + if filesSkipped > 0 { + fmt.Fprintf(out, "Skipped %d file(s).\n", filesSkipped) + } +} + +func printFixSummary(out io.Writer, results []checker.Result) { + summary := checker.Summarize(results) + fmt.Fprintf(out, "\nLink status: %d alive | %d redirects | %d dead | %d errors\n", + summary.Alive, summary.Redirects, summary.Dead, summary.Errors) + + if summary.Redirects == 0 { + return + } + + notFixable := 0 + for _, r := range results { + if r.Status == checker.StatusRedirect && r.FinalStatus != 200 { + notFixable++ + } + } + if notFixable > 0 { + fmt.Fprintf(out, "Note: %d redirect(s) lead to non-200 responses and cannot be auto-fixed.\n", notFixable) + } +} diff --git a/cmd/helpers_test.go b/cmd/helpers_test.go index 1e1cd7e..7e598f0 100644 --- a/cmd/helpers_test.go +++ b/cmd/helpers_test.go @@ -241,7 +241,7 @@ func TestFilterResultHelpers(t *testing.T) { assert.Len(t, FilterResultsDead(results), 2) assert.Len(t, FilterResultsDuplicates(results), 1) assert.Len(t, FilterResultsAlive(results), 1) - assert.Equal(t, 5, len(filterResults(results))) + assert.Equal(t, 5, len(filterResults(results, checkRenderOptions{ShowWarnings: false}))) } func TestCheckHelpersAndReportBuilding(t *testing.T) { @@ -252,8 +252,8 @@ func TestCheckHelpersAndReportBuilding(t *testing.T) { outputFile = "" showIgnored = true showWarnings = true - - assert.NoError(t, validateCheckFlags()) + runner := newCheckRunner(currentCheckOptions(), defaultCommandEnv(), IOStreams{}) + assert.NoError(t, runner.validateFlags()) assert.Equal(t, ".", getPathArg(nil)) assert.Equal(t, "docs", getPathArg([]string{"docs"})) assert.NoError(t, validateFileTypes([]string{"md", "json", "yaml", "toml", "xml"})) @@ -285,18 +285,19 @@ func TestCheckHelpersAndReportBuilding(t *testing.T) { require.True(t, urlFilter.ShouldIgnore("https://ignored.example/path", "README.md", 7)) assert.Equal(t, 1, getIgnoredCount(urlFilter)) - report := buildReport([]string{"README.md"}, results, summary, urlFilter) + report := buildReport(time.Unix(123, 0), []string{"README.md"}, results, summary, urlFilter, runner.renderOptions()) require.NotNil(t, report) assert.Equal(t, summary.Total+1, report.TotalLinks) require.Len(t, report.Ignored, 1) assert.Equal(t, "https://ignored.example/path", report.Ignored[0].URL) assert.Equal(t, "301 → 200", formatRedirectChain(results[0])) - assert.Equal(t, "No warnings found.", getEmptyResultsMessage(checker.Summary{Alive: 2})) + assert.Equal(t, "No warnings found.", getEmptyResultsMessage(checker.Summary{Alive: 2}, runner.renderOptions())) showWarnings = false showDead = true - assert.Equal(t, "No dead links found.", getEmptyResultsMessage(checker.Summary{Alive: 2})) + runner = newCheckRunner(currentCheckOptions(), defaultCommandEnv(), IOStreams{}) + assert.Equal(t, "No dead links found.", getEmptyResultsMessage(checker.Summary{Alive: 2}, runner.renderOptions())) } func TestValidateCheckFlags_InvalidCombinations(t *testing.T) { @@ -305,13 +306,13 @@ func TestValidateCheckFlags_InvalidCombinations(t *testing.T) { outputFormat = "json" outputFile = "report.json" - err := validateCheckFlags() + err := newCheckRunner(currentCheckOptions(), defaultCommandEnv(), IOStreams{}).validateFlags() require.Error(t, err) assert.Contains(t, err.Error(), "mutually exclusive") outputFile = "" outputFormat = "invalid" - err = validateCheckFlags() + err = newCheckRunner(currentCheckOptions(), defaultCommandEnv(), IOStreams{}).validateFlags() require.Error(t, err) assert.True(t, strings.Contains(err.Error(), "invalid format")) } diff --git a/cmd/integration_test.go b/cmd/integration_test.go index 6ec94e2..5bf5c34 100644 --- a/cmd/integration_test.go +++ b/cmd/integration_test.go @@ -75,6 +75,79 @@ ignore: assert.Equal(t, "https://ignored.example/path", payload.Ignored[0].URL) } +func TestCheck_WritesOutputFile(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(tmpDir, "README.md"), + []byte("[alive]("+server.URL+")\n"), + 0o644, + )) + + reportPath := filepath.Join(tmpDir, "report.json") + result := runGone(t, tmpDir, "check", ".", "--output", reportPath, "--no-config") + require.Equal(t, 0, result.exitCode, result.stderr) + assert.Contains(t, result.stdout, "Wrote report to") + + data, err := os.ReadFile(reportPath) + require.NoError(t, err) + + var payload map[string]any + require.NoError(t, json.Unmarshal(data, &payload)) + assert.Equal(t, float64(1), payload["total_links"]) + assert.Equal(t, float64(1), payload["unique_urls"]) +} + +func TestCheck_CLIFormatOverridesConfigFormat(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, ".gonerc.yaml"), []byte("output:\n format: yaml\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "README.md"), []byte("# No links here\n"), 0o644)) + + result := runGone(t, tmpDir, "check", ".", "--format=json") + require.Equal(t, 0, result.exitCode, result.stderr) + require.NotEmpty(t, strings.TrimSpace(result.stdout)) + + var payload map[string]any + require.NoError(t, json.Unmarshal([]byte(result.stdout), &payload)) + assert.Equal(t, float64(1), payload["total_files"]) +} + +func TestCheck_StrictModeParseFailure(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "broken.json"), []byte("{not-json"), 0o644)) + + result := runGone(t, tmpDir, "check", ".", "--types=json", "--strict", "--no-config") + require.Equal(t, 1, result.exitCode) + assert.Contains(t, result.stderr, "Error parsing files") + assert.Contains(t, result.stderr, "broken.json") +} + +func TestCheck_TextOutput_WhenAllLinksIgnored(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile( + filepath.Join(tmpDir, "README.md"), + []byte("[ignored](https://ignored.example/path)\n"), + 0o644, + )) + + result := runGone(t, tmpDir, "check", ".", "--ignore-domain=ignored.example", "--show-ignored", "--no-config") + require.Equal(t, 0, result.exitCode, result.stderr) + assert.Contains(t, result.stdout, "All links were ignored by filter rules.") + assert.Contains(t, result.stdout, "=== Ignored URLs (1) ===") +} + func TestFix_Yes_UpdatesRedirectsAcrossFileTypes(t *testing.T) { t.Parallel() @@ -122,6 +195,40 @@ func TestFix_Yes_UpdatesRedirectsAcrossFileTypes(t *testing.T) { assert.Contains(t, string(jsonContent), finalURL) } +func TestFix_InteractiveScriptedInput(t *testing.T) { + t.Parallel() + + finalURL := "" + redirectServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/old": + http.Redirect(w, r, finalURL, http.StatusMovedPermanently) + case "/new": + w.WriteHeader(http.StatusOK) + default: + w.WriteHeader(http.StatusNotFound) + } + })) + defer redirectServer.Close() + + finalURL = redirectServer.URL + "/new" + oldURL := redirectServer.URL + "/old" + + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "README.md") + require.NoError(t, os.WriteFile(filePath, []byte("[docs]("+oldURL+")\n"), 0o644)) + + result := runGoneWithInput(t, tmpDir, "?\ny\n", "fix", ".", "--types=md", "--no-config") + require.Equal(t, 0, result.exitCode, result.stderr) + assert.Contains(t, result.stdout, "Interactive mode options:") + assert.Contains(t, result.stdout, "Fixed 1 redirect(s) in README.md") + + content, err := os.ReadFile(filePath) + require.NoError(t, err) + assert.NotContains(t, string(content), oldURL) + assert.Contains(t, string(content), finalURL) +} + type cliResult struct { stdout string stderr string @@ -135,6 +242,22 @@ func runGone(t *testing.T, dir string, args ...string) cliResult { cmd := exec.Command(binaryPath, args...) cmd.Dir = dir output, err := cmd.CombinedOutput() + return cliResultFromOutput(t, output, err) +} + +func runGoneWithInput(t *testing.T, dir, input string, args ...string) cliResult { + t.Helper() + + binaryPath := buildGoneBinary(t) + cmd := exec.Command(binaryPath, args...) + cmd.Dir = dir + cmd.Stdin = strings.NewReader(input) + output, err := cmd.CombinedOutput() + return cliResultFromOutput(t, output, err) +} + +func cliResultFromOutput(t *testing.T, output []byte, err error) cliResult { + t.Helper() result := cliResult{ stdout: string(output), diff --git a/cmd/runner_test.go b/cmd/runner_test.go new file mode 100644 index 0000000..159dc80 --- /dev/null +++ b/cmd/runner_test.go @@ -0,0 +1,373 @@ +package cmd + +import ( + "bytes" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/config" + "github.com/leonardomso/gone/internal/filter" + "github.com/leonardomso/gone/internal/fixer" + "github.com/leonardomso/gone/internal/output" + "github.com/leonardomso/gone/internal/parser" + "github.com/leonardomso/gone/internal/scanner" + "github.com/leonardomso/gone/internal/stats" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type stubChecker struct { + results []checker.Result +} + +func (s stubChecker) CheckAll([]checker.Link) []checker.Result { + return append([]checker.Result(nil), s.results...) +} + +type stubFixer struct { + preview string + changes []fixer.FileChanges + applyAll []fixer.FixResult + applyToFile map[string]fixer.FixResult +} + +func (*stubFixer) SetParserLinks([]parser.Link) {} + +func (s *stubFixer) FindFixes([]checker.Result) []fixer.FileChanges { + return append([]fixer.FileChanges(nil), s.changes...) +} + +func (s *stubFixer) Preview([]fixer.FileChanges) string { + return s.preview +} + +func (s *stubFixer) ApplyAll([]fixer.FileChanges) []fixer.FixResult { + return append([]fixer.FixResult(nil), s.applyAll...) +} + +func (s *stubFixer) ApplyToFile(fc fixer.FileChanges) (*fixer.FixResult, error) { + result := s.applyToFile[fc.FilePath] + return &result, result.Error +} + +func testCommandEnv() CommandEnv { + return CommandEnv{ + LoadConfig: func(bool) (*LoadedConfig, error) { + return &LoadedConfig{cfg: &config.Config{}}, nil + }, + FindFiles: func(scanner.ScanOptions) ([]string, error) { + return []string{"README.md"}, nil + }, + ExtractLinks: func([]string, bool) ([]parser.Link, error) { + return nil, nil + }, + CreateFilterWithConfig: CreateFilterWithConfig, + NewChecker: func(checker.Options) LinkChecker { + return stubChecker{} + }, + NewFixer: func() RedirectFixer { + return &stubFixer{} + }, + FormatReport: output.FormatReport, + WriteToFile: output.WriteToFile, + NewStats: stats.New, + Now: func() time.Time { + return time.Unix(123, 0) + }, + } +} + +func TestCheckRunner_Run_NoLinksStructuredOutput(t *testing.T) { + t.Parallel() + + var stdout, stderr bytes.Buffer + env := testCommandEnv() + env.LoadConfig = func(bool) (*LoadedConfig, error) { + return &LoadedConfig{cfg: &config.Config{ + Output: config.OutputConfig{Format: "json"}, + }}, nil + } + + runner := newCheckRunner(CheckOptions{FileTypes: []string{"md"}}, env, IOStreams{ + Out: &stdout, + ErrOut: &stderr, + }) + + code := runner.Run(nil) + require.Equal(t, 0, code) + assert.Empty(t, stderr.String()) + assert.Contains(t, stdout.String(), `"total_links": 0`) + assert.Contains(t, stdout.String(), `"total_files": 1`) +} + +func TestCheckRunner_Run_AllLinksIgnoredTextOutput(t *testing.T) { + t.Parallel() + + var stdout, stderr bytes.Buffer + env := testCommandEnv() + env.ExtractLinks = func([]string, bool) ([]parser.Link, error) { + return []parser.Link{{URL: "https://ignored.example/path", FilePath: "README.md", Line: 3}}, nil + } + + runner := newCheckRunner(CheckOptions{ + FileTypes: []string{"md"}, + ShowIgnored: true, + IgnoreDomains: []string{"ignored.example"}, + }, env, IOStreams{ + Out: &stdout, + ErrOut: &stderr, + }) + + code := runner.Run(nil) + require.Equal(t, 0, code) + assert.Empty(t, stderr.String()) + assert.Contains(t, stdout.String(), "All links were ignored by filter rules.") + assert.Contains(t, stdout.String(), "=== Ignored URLs (1) ===") +} + +func TestCheckRunner_Run_FileOutputSummary(t *testing.T) { + t.Parallel() + + var stdout, stderr bytes.Buffer + env := testCommandEnv() + env.ExtractLinks = func([]string, bool) ([]parser.Link, error) { + return []parser.Link{{URL: "https://alive.example", FilePath: "README.md", Line: 2}}, nil + } + env.NewChecker = func(checker.Options) LinkChecker { + return stubChecker{ + results: []checker.Result{{ + Link: checker.Link{URL: "https://alive.example", FilePath: "README.md", Line: 2}, + Status: checker.StatusAlive, + StatusCode: 200, + }}, + } + } + + outputPath := filepath.Join(t.TempDir(), "report.json") + runner := newCheckRunner(CheckOptions{ + FileTypes: []string{"md"}, + OutputFile: outputPath, + }, env, IOStreams{ + Out: &stdout, + ErrOut: &stderr, + }) + + code := runner.Run(nil) + require.Equal(t, 0, code) + assert.Empty(t, stderr.String()) + assert.Contains(t, stdout.String(), "Wrote report to") + assert.Contains(t, stdout.String(), "Summary: 1 alive | 0 warnings | 0 dead | 0 duplicates") +} + +func TestCheckRunner_Run_DeadLinksReturnExitCodeOne(t *testing.T) { + t.Parallel() + + var stdout, stderr bytes.Buffer + env := testCommandEnv() + env.ExtractLinks = func([]string, bool) ([]parser.Link, error) { + return []parser.Link{{URL: "https://dead.example", FilePath: "README.md", Line: 2}}, nil + } + env.NewChecker = func(checker.Options) LinkChecker { + return stubChecker{ + results: []checker.Result{{ + Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, + Status: checker.StatusDead, + StatusCode: 404, + }}, + } + } + + runner := newCheckRunner(CheckOptions{ + FileTypes: []string{"md"}, + }, env, IOStreams{ + Out: &stdout, + ErrOut: &stderr, + }) + + code := runner.Run(nil) + assert.Equal(t, 1, code) + assert.Empty(t, stderr.String()) + assert.Contains(t, stdout.String(), "Summary:") +} + +func TestOutputTextAndPrintHelpers(t *testing.T) { + t.Parallel() + + var outputBuf bytes.Buffer + primary := checker.Result{ + Link: checker.Link{URL: "https://redirect.example", FilePath: "README.md", Line: 1, Text: "Redirect"}, + Status: checker.StatusRedirect, + StatusCode: 301, + FinalStatus: 200, + FinalURL: "https://final.example", + RedirectChain: []checker.Redirect{{URL: "https://redirect.example", StatusCode: 301}}, + } + results := []checker.Result{ + primary, + {Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, Status: checker.StatusDead, StatusCode: 404}, + {Link: checker.Link{URL: "https://duplicate.example", FilePath: "README.md", Line: 3}, Status: checker.StatusDuplicate, DuplicateOf: &primary}, + {Link: checker.Link{URL: "https://alive.example", FilePath: "README.md", Line: 4}, Status: checker.StatusAlive, StatusCode: 200}, + } + + urlFilter, err := filter.New(filter.Config{Domains: []string{"ignored.example"}}) + require.NoError(t, err) + require.True(t, urlFilter.ShouldIgnore("https://ignored.example/path", "README.md", 8)) + + opts := checkRenderOptions{ShowAll: true, ShowIgnored: true} + outputText(&outputBuf, results, checker.Summarize(results), urlFilter, opts) + + assert.Contains(t, outputBuf.String(), "=== Warnings (1) ===") + assert.Contains(t, outputBuf.String(), "=== Dead Links (1) ===") + assert.Contains(t, outputBuf.String(), "=== Duplicates (1) ===") + assert.Contains(t, outputBuf.String(), "=== Alive (1) ===") + assert.Contains(t, outputBuf.String(), "=== Ignored URLs (1) ===") + + outputBuf.Reset() + printProgressMessage(&outputBuf, 5, 4, 3, 2, 1) + assert.Contains(t, outputBuf.String(), "Found 5 link(s), checking 3 unique URLs (2 duplicates, 1 ignored)") +} + +func TestOutputText_GroupedVsFlat(t *testing.T) { + t.Parallel() + + results := []checker.Result{ + {Link: checker.Link{URL: "https://redirect.example", FilePath: "README.md", Line: 1}, Status: checker.StatusRedirect, StatusCode: 301, FinalStatus: 200, FinalURL: "https://final.example"}, + {Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, Status: checker.StatusDead, StatusCode: 404}, + } + summary := checker.Summarize(results) + + var grouped bytes.Buffer + outputText(&grouped, results, summary, nil, checkRenderOptions{}) + assert.Contains(t, grouped.String(), "=== Warnings (1) ===") + assert.Contains(t, grouped.String(), "=== Dead Links (1) ===") + + var flat bytes.Buffer + outputText(&flat, results, summary, nil, checkRenderOptions{ShowDead: true}) + assert.NotContains(t, flat.String(), "=== Warnings") + assert.NotContains(t, flat.String(), "=== Dead Links") + assert.Contains(t, flat.String(), "[404]") +} + +func TestGetEmptyResultsMessage_Variants(t *testing.T) { + t.Parallel() + + assert.Equal(t, "No alive links found.", getEmptyResultsMessage(checker.Summary{}, checkRenderOptions{ShowAlive: true})) + assert.Equal(t, "No warnings found.", getEmptyResultsMessage(checker.Summary{}, checkRenderOptions{ShowWarnings: true})) + assert.Equal(t, "No dead links found.", getEmptyResultsMessage(checker.Summary{Redirects: 1}, checkRenderOptions{ShowDead: true})) + assert.Equal(t, "All links are alive!", getEmptyResultsMessage(checker.Summary{Alive: 3}, checkRenderOptions{})) +} + +func TestRunInteractiveFix_Branches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + changes []fixer.FileChanges + expectedCode int + expectOut []string + }{ + { + name: "help then no", + input: "?\nn\n", + changes: []fixer.FileChanges{{FilePath: "README.md", TotalFixes: 1}}, + expectedCode: 0, + expectOut: []string{"Interactive mode options:", "Skipped README.md", "Skipped 1 file(s)."}, + }, + { + name: "invalid then yes", + input: "oops\ny\n", + changes: []fixer.FileChanges{{FilePath: "README.md", TotalFixes: 1}}, + expectedCode: 0, + expectOut: []string{"Invalid input.", "Fixed 1 redirect(s) in README.md", "Fixed 1 redirect(s) across 1 file(s)."}, + }, + { + name: "quit", + input: "q\n", + changes: []fixer.FileChanges{ + {FilePath: "README.md", TotalFixes: 1}, + {FilePath: "docs.md", TotalFixes: 1}, + }, + expectedCode: 2, + expectOut: []string{"Quitting. Remaining files were not modified.", "Skipped 2 file(s)."}, + }, + { + name: "all", + input: "a\n", + changes: []fixer.FileChanges{ + {FilePath: "README.md", TotalFixes: 1}, + {FilePath: "docs.md", TotalFixes: 1}, + }, + expectedCode: 0, + expectOut: []string{"Fixed 1 redirect(s) in README.md", "Fixed 2 redirect(s) across 2 file(s)."}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var stdout, stderr bytes.Buffer + f := &stubFixer{ + applyToFile: map[string]fixer.FixResult{ + "README.md": {FilePath: "README.md", Applied: 1}, + "docs.md": {FilePath: "docs.md", Applied: 1}, + }, + } + + code := runInteractiveFix(IOStreams{ + In: strings.NewReader(tt.input), + Out: &stdout, + ErrOut: &stderr, + }, f, tt.changes) + + assert.Equal(t, tt.expectedCode, code) + assert.Empty(t, stderr.String()) + for _, expected := range tt.expectOut { + assert.Contains(t, stdout.String(), expected) + } + }) + } +} + +func TestFixRunner_Run_DryRun(t *testing.T) { + t.Parallel() + + var stdout, stderr bytes.Buffer + env := testCommandEnv() + env.ExtractLinks = func([]string, bool) ([]parser.Link, error) { + return []parser.Link{{URL: "https://old.example", FilePath: "README.md", Line: 2}}, nil + } + env.NewChecker = func(checker.Options) LinkChecker { + return stubChecker{ + results: []checker.Result{{ + Link: checker.Link{URL: "https://old.example", FilePath: "README.md", Line: 2}, + Status: checker.StatusRedirect, + StatusCode: 301, + FinalStatus: 200, + FinalURL: "https://new.example", + }}, + } + } + env.NewFixer = func() RedirectFixer { + return &stubFixer{ + preview: "preview output\n", + changes: []fixer.FileChanges{{FilePath: "README.md", TotalFixes: 1}}, + } + } + + runner := newFixRunner(FixOptions{ + FileTypes: []string{"md"}, + DryRun: true, + }, env, IOStreams{ + Out: &stdout, + ErrOut: &stderr, + }) + + code := runner.Run(nil) + require.Equal(t, 0, code) + assert.Empty(t, stderr.String()) + assert.Contains(t, stdout.String(), "preview output") + assert.Contains(t, stdout.String(), "Dry-run mode: no files were modified.") +} diff --git a/cmd/runtime.go b/cmd/runtime.go new file mode 100644 index 0000000..cc59442 --- /dev/null +++ b/cmd/runtime.go @@ -0,0 +1,78 @@ +package cmd + +import ( + "io" + "os" + "time" + + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/config" + "github.com/leonardomso/gone/internal/filter" + "github.com/leonardomso/gone/internal/fixer" + "github.com/leonardomso/gone/internal/output" + "github.com/leonardomso/gone/internal/parser" + "github.com/leonardomso/gone/internal/scanner" + "github.com/leonardomso/gone/internal/stats" +) + +// IOStreams groups the command IO streams to make runners testable. +type IOStreams struct { + In io.Reader + Out io.Writer + ErrOut io.Writer +} + +func defaultIOStreams() IOStreams { + return IOStreams{ + In: os.Stdin, + Out: os.Stdout, + ErrOut: os.Stderr, + } +} + +// LinkChecker is the checker dependency used by command runners. +type LinkChecker interface { + CheckAll([]checker.Link) []checker.Result +} + +// RedirectFixer is the fixer dependency used by command runners. +type RedirectFixer interface { + SetParserLinks([]parser.Link) + FindFixes([]checker.Result) []fixer.FileChanges + Preview([]fixer.FileChanges) string + ApplyAll([]fixer.FileChanges) []fixer.FixResult + ApplyToFile(fixer.FileChanges) (*fixer.FixResult, error) +} + +// CommandEnv groups command-side dependencies so runners can be tested directly. +type CommandEnv struct { + LoadConfig func(bool) (*LoadedConfig, error) + FindFiles func(scanner.ScanOptions) ([]string, error) + ExtractLinks func([]string, bool) ([]parser.Link, error) + CreateFilterWithConfig func(*config.Config, []string, []string, []string) (*filter.Filter, error) + NewChecker func(checker.Options) LinkChecker + NewFixer func() RedirectFixer + FormatReport func(*output.Report, output.Format) ([]byte, error) + WriteToFile func(*output.Report, string) error + NewStats func() *stats.Stats + Now func() time.Time +} + +func defaultCommandEnv() CommandEnv { + return CommandEnv{ + LoadConfig: LoadConfig, + FindFiles: scanner.FindFilesWithOptions, + ExtractLinks: parser.ExtractLinksFromMultipleFilesWithRegistry, + CreateFilterWithConfig: CreateFilterWithConfig, + NewChecker: func(opts checker.Options) LinkChecker { + return checker.New(opts) + }, + NewFixer: func() RedirectFixer { + return fixer.New() + }, + FormatReport: output.FormatReport, + WriteToFile: output.WriteToFile, + NewStats: stats.New, + Now: time.Now, + } +} diff --git a/go.mod b/go.mod index 36c1481..f19461d 100644 --- a/go.mod +++ b/go.mod @@ -3,19 +3,21 @@ module github.com/leonardomso/gone go 1.25.5 require ( + github.com/BurntSushi/toml v1.6.0 github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 github.com/gobwas/glob v0.2.3 - github.com/pelletier/go-toml/v2 v2.2.4 + github.com/samber/lo v1.53.0 github.com/spf13/cobra v1.10.2 github.com/stretchr/testify v1.11.1 github.com/yuin/goldmark v1.7.13 + go.uber.org/goleak v1.3.0 gopkg.in/yaml.v3 v3.0.1 ) require ( - github.com/BurntSushi/toml v1.6.0 // indirect + github.com/alitto/pond/v2 v2.7.0 // indirect github.com/atotto/clipboard v0.1.4 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect @@ -25,6 +27,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/kr/text v0.2.0 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-localereader v0.0.1 // indirect @@ -35,8 +38,9 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sahilm/fuzzy v0.1.1 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect golang.org/x/sys v0.36.0 // indirect - golang.org/x/text v0.3.8 // indirect + golang.org/x/text v0.22.0 // indirect ) diff --git a/go.sum b/go.sum index 2406b48..d271e47 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/alitto/pond/v2 v2.7.0 h1:c76L+yN916m/DRXjGCeUBHHu92uWnh/g1bwVk4zyyXg= +github.com/alitto/pond/v2 v2.7.0/go.mod h1:xkjYEgQ05RSpWdfSd1nM3OVv7TBhLdy7rMp3+2Nq+yE= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= @@ -23,6 +25,7 @@ github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ= github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= @@ -31,6 +34,11 @@ github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= @@ -47,8 +55,6 @@ github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELU github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= -github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= -github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= @@ -57,6 +63,10 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA= github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= +github.com/samber/lo v1.53.0 h1:t975lj2py4kJPQ6haz1QMgtId2gtmfktACxIXArw3HM= +github.com/samber/lo v1.53.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -68,6 +78,8 @@ github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavM github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA= github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E= golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= @@ -75,9 +87,11 @@ golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 7eea260..c415e51 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -13,8 +13,10 @@ import ( "net" "net/http" "net/url" - "sync" "time" + + "github.com/alitto/pond/v2" + "github.com/sourcegraph/conc" ) // Checker performs concurrent link checking with configurable options. @@ -90,105 +92,78 @@ func (c *Checker) CheckAll(links []Link) []Result { // The returned channel will be closed when all links have been checked. // Use the context to cancel ongoing checks. func (c *Checker) Check(ctx context.Context, links []Link) <-chan Result { - results := make(chan Result, c.opts.Concurrency) + results := make(chan Result, max(c.opts.Concurrency, 1)) go func() { defer close(results) - // Deduplicate: group links by URL - // Pre-allocate with estimated capacity (assume ~70% unique URLs) - urlToLinks := make(map[string][]Link, len(links)*7/10) - urlOrder := make([]string, 0, len(links)*7/10) // Preserve order for deterministic output - for _, link := range links { - if _, exists := urlToLinks[link.URL]; !exists { - urlOrder = append(urlOrder, link.URL) - } - urlToLinks[link.URL] = append(urlToLinks[link.URL], link) - } - - // Create job queue with unique URLs only (first occurrence of each) - uniqueLinks := make([]Link, 0, len(urlOrder)) - for _, u := range urlOrder { - uniqueLinks = append(uniqueLinks, urlToLinks[u][0]) - } - - // Store primary results for duplicates - primaryResults := make(map[string]*Result, len(urlOrder)) - var resultsMu sync.Mutex - - // Channel for primary results from workers - primaryChan := make(chan Result, c.opts.Concurrency) + urlToLinks, uniqueLinks := deduplicateLinks(links) + primaryChan := make(chan Result, max(c.opts.Concurrency, 1)) + pool := pond.NewPool(max(c.opts.Concurrency, 1), pond.WithContext(ctx)) - // Start worker pool - var wg sync.WaitGroup - jobs := make(chan Link, len(uniqueLinks)) + var lifecycle conc.WaitGroup + lifecycle.Go(func() { + defer close(primaryChan) - for range c.opts.Concurrency { - wg.Go(func() { - for link := range jobs { - select { - case <-ctx.Done(): - primaryChan <- Result{ - Link: link, - Status: StatusError, - Error: "check canceled", - } - default: - result := c.checkWithRetry(ctx, link) - primaryChan <- result - } + for _, link := range uniqueLinks { + if ctx.Err() != nil { + break } - }) - } - // Send jobs to workers - go func() { - sendLoop: - for _, link := range uniqueLinks { - select { - case jobs <- link: - case <-ctx.Done(): - break sendLoop + link := link + if err := pool.Go(func() { + if ctx.Err() != nil { + return + } + primaryChan <- c.checkWithRetry(ctx, link) + }); err != nil { + if ctx.Err() != nil || errors.Is(err, pond.ErrPoolStopped) { + break + } + primaryChan <- Result{ + Link: link, + Status: StatusError, + Error: err.Error(), + } } } - close(jobs) - }() - // Collect primary results and emit all occurrences - go func() { - wg.Wait() - close(primaryChan) - }() + pool.StopAndWait() + }) for result := range primaryChan { - // Store as primary result - resultsMu.Lock() - resultCopy := result - primaryResults[result.Link.URL] = &resultCopy - resultsMu.Unlock() - - // Get all occurrences of this URL occurrences := urlToLinks[result.Link.URL] - - // Emit primary result (first occurrence) results <- result - // Emit duplicate results for additional occurrences + resultCopy := result for i := 1; i < len(occurrences); i++ { - dupResult := Result{ + results <- Result{ Link: occurrences[i], StatusCode: result.StatusCode, Status: StatusDuplicate, DuplicateOf: &resultCopy, } - results <- dupResult } } + + lifecycle.Wait() }() return results } +func deduplicateLinks(links []Link) (map[string][]Link, []Link) { + urlToLinks := make(map[string][]Link, len(links)*7/10) + uniqueLinks := make([]Link, 0, len(links)*7/10) + for _, link := range links { + if _, exists := urlToLinks[link.URL]; !exists { + uniqueLinks = append(uniqueLinks, link) + } + urlToLinks[link.URL] = append(urlToLinks[link.URL], link) + } + return urlToLinks, uniqueLinks +} + // checkWithRetry attempts to check a link with exponential backoff retry. func (c *Checker) checkWithRetry(ctx context.Context, link Link) Result { var lastResult Result diff --git a/internal/checker/main_test.go b/internal/checker/main_test.go new file mode 100644 index 0000000..6c71e44 --- /dev/null +++ b/internal/checker/main_test.go @@ -0,0 +1,11 @@ +package checker + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/internal/parser/parser.go b/internal/parser/parser.go index 947f614..cc817bb 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -9,7 +9,9 @@ import ( "runtime" "sort" "strings" - "sync" + + "github.com/alitto/pond/v2" + "github.com/sourcegraph/conc" ) // LinkType represents the type of link found in a file. @@ -82,6 +84,8 @@ type fileResult struct { err error } +const parallelExtractThreshold = 2 + // ============================================================================= // Shared Helper Functions (used by all parsers) // ============================================================================= @@ -213,7 +217,7 @@ func ExtractLinksFromMultipleFilesWithRegistry(filePaths []string, strict bool) } // For small number of files, use sequential processing - if len(supportedFiles) <= 2 { + if len(supportedFiles) <= parallelExtractThreshold { return extractLinksSequentialWithRegistry(supportedFiles, strict) } @@ -245,42 +249,37 @@ func extractLinksSequentialWithRegistry(filePaths []string, strict bool) ([]Link func extractLinksParallelWithRegistry(filePaths []string, strict bool) ([]Link, error) { numWorkers := min(runtime.NumCPU(), len(filePaths)) - type job struct { - path string - strict bool - } - - jobs := make(chan job, len(filePaths)) - results := make(chan fileResult, len(filePaths)) - - // Start workers - var wg sync.WaitGroup - for range numWorkers { - wg.Go(func() { - for j := range jobs { - links, err := ExtractLinksWithRegistry(j.path, j.strict) - results <- fileResult{links: links, err: err} - } - }) - } + pool := pond.NewResultPool[fileResult](numWorkers) + defer pool.StopAndWait() - // Send jobs + futures := make([]pond.ResultTask[fileResult], 0, len(filePaths)) for _, path := range filePaths { - jobs <- job{path: path, strict: strict} + path := path + futures = append(futures, pool.Submit(func() fileResult { + links, err := ExtractLinksWithRegistry(path, strict) + return fileResult{links: links, err: err} + })) } - close(jobs) - // Wait for workers and close results - go func() { - wg.Wait() - close(results) - }() + results := make(chan fileResult, len(futures)) + var lifecycle conc.WaitGroup + lifecycle.Go(func() { + defer close(results) + for _, future := range futures { + result, err := future.Wait() + if err != nil { + results <- fileResult{err: err} + return + } + results <- result + } + }) - // Collect results allLinks := make([]Link, 0, len(filePaths)*30) for result := range results { if result.err != nil { if strict { + lifecycle.Wait() return nil, result.err } // In non-strict mode, skip files with errors @@ -291,5 +290,7 @@ func extractLinksParallelWithRegistry(filePaths []string, strict bool) ([]Link, } } + lifecycle.Wait() + return allLinks, nil } diff --git a/internal/scanner/scanner_test.go b/internal/scanner/scanner_test.go index 68e550e..d6aaa71 100644 --- a/internal/scanner/scanner_test.go +++ b/internal/scanner/scanner_test.go @@ -3,6 +3,7 @@ package scanner import ( "os" "path/filepath" + "runtime" "sort" "testing" @@ -142,6 +143,47 @@ func TestFindFiles(t *testing.T) { require.NoError(t, err) assert.Len(t, files, 3) // 2 .md + 1 .txt }) + + t.Run("SymlinkedDirectoryIsNotTraversed", func(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + docsDir := filepath.Join(tmpDir, "docs") + targetDir := t.TempDir() + require.NoError(t, os.MkdirAll(docsDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(docsDir, "root.md"), []byte("# Root"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(targetDir, "nested.md"), []byte("# Nested"), 0o644)) + + linkPath := filepath.Join(docsDir, "linked-dir") + require.NoError(t, os.Symlink(targetDir, linkPath)) + + files, err := FindFiles(tmpDir, []string{".md"}) + require.NoError(t, err) + assert.Len(t, files, 1) + assert.Contains(t, files[0], "root.md") + }) + + t.Run("UnreadableDirectoryReturnsError", func(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("permission test is not portable on Windows") + } + + tmpDir := t.TempDir() + blockedDir := filepath.Join(tmpDir, "blocked") + require.NoError(t, os.MkdirAll(blockedDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "root.md"), []byte("# Root"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(blockedDir, "hidden.md"), []byte("# Hidden"), 0o644)) + require.NoError(t, os.Chmod(blockedDir, 0)) + t.Cleanup(func() { + _ = os.Chmod(blockedDir, 0o755) + }) + + files, err := FindFiles(tmpDir, []string{".md"}) + assert.Error(t, err) + assert.Nil(t, files) + }) } func TestFindFilesByTypes(t *testing.T) { diff --git a/internal/ui/app_test.go b/internal/ui/app_test.go new file mode 100644 index 0000000..b920255 --- /dev/null +++ b/internal/ui/app_test.go @@ -0,0 +1,163 @@ +package ui + +import ( + "testing" + + "github.com/charmbracelet/bubbles/key" + tea "github.com/charmbracelet/bubbletea" + "github.com/leonardomso/gone/internal/checker" + "github.com/leonardomso/gone/internal/filter" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNew_Defaults(t *testing.T) { + t.Parallel() + + model := New("", nil, nil, false, nil, nil) + assert.Equal(t, ".", model.path) + assert.Equal(t, []string{"md"}, model.fileTypes) + assert.Equal(t, stateScanning, model.state) + assert.Equal(t, filterAll, model.filter) +} + +func TestHandleFilesFound_TransitionsToExtracting(t *testing.T) { + t.Parallel() + + model := New(".", nil, []string{"md"}, false, nil, nil) + updated, cmd := model.handleFilesFound(FilesFoundMsg{Files: []string{"README.md"}}) + require.NotNil(t, updated) + require.NotNil(t, cmd) + assert.Equal(t, stateExtracting, model.state) + assert.Equal(t, []string{"README.md"}, model.files) +} + +func TestHandleLinksExtracted_AppliesFilterAndStartsChecking(t *testing.T) { + t.Parallel() + + urlFilter, err := filter.New(filter.Config{Domains: []string{"ignored.example"}}) + require.NoError(t, err) + + model := New(".", urlFilter, []string{"md"}, false, nil, nil) + updated, cmd := model.handleLinksExtracted(LinksExtractedMsg{ + Links: []checker.Link{ + {URL: "https://allowed.example", FilePath: "README.md", Line: 1}, + {URL: "https://ignored.example/path", FilePath: "README.md", Line: 2}, + }, + }) + require.NotNil(t, updated) + require.NotNil(t, cmd) + assert.Equal(t, stateChecking, model.state) + assert.Len(t, model.links, 1) + assert.Equal(t, 1, model.uniqueURLs) +} + +func TestHandleLinkChecked_CategorizesResults(t *testing.T) { + t.Parallel() + + model := New(".", nil, []string{"md"}, false, nil, nil) + + _, _ = model.handleLinkChecked(LinkCheckedMsg{Result: checker.Result{Status: checker.StatusAlive}}) + _, _ = model.handleLinkChecked(LinkCheckedMsg{Result: checker.Result{Status: checker.StatusRedirect}}) + _, _ = model.handleLinkChecked(LinkCheckedMsg{Result: checker.Result{Status: checker.StatusDead}}) + _, _ = model.handleLinkChecked(LinkCheckedMsg{Result: checker.Result{Status: checker.StatusDuplicate}}) + + assert.Equal(t, 4, model.checked) + assert.Len(t, model.aliveLinks, 1) + assert.Len(t, model.warningLinks, 1) + assert.Len(t, model.deadLinks, 1) + assert.Len(t, model.duplicateLinks, 1) +} + +func TestHandleAllChecksComplete_UpdatesList(t *testing.T) { + t.Parallel() + + model := New(".", nil, []string{"md"}, false, nil, nil) + model.warningLinks = []checker.Result{{Link: checker.Link{URL: "https://warn.example"}}} + model.deadLinks = []checker.Result{{Link: checker.Link{URL: "https://dead.example"}}} + + updated, cmd := model.handleAllChecksComplete() + require.NotNil(t, updated) + assert.Nil(t, cmd) + assert.Equal(t, stateResults, model.state) + assert.Len(t, model.list.Items(), 2) +} + +func TestUpdate_WindowSizeAndKeys(t *testing.T) { + t.Parallel() + + model := New(".", nil, []string{"md"}, false, nil, nil) + updated, cmd := model.Update(tea.WindowSizeMsg{Width: 100, Height: 30}) + require.NotNil(t, updated) + assert.Nil(t, cmd) + updatedModel, ok := updated.(Model) + require.True(t, ok) + assert.Equal(t, 100, updatedModel.width) + assert.Equal(t, 30, updatedModel.height) + + updated, cmd = updatedModel.Update(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{'?'}}) + require.NotNil(t, updated) + assert.Nil(t, cmd) + updatedModel, ok = updated.(Model) + require.True(t, ok) + assert.True(t, updatedModel.showHelp) +} + +func TestUpdate_FilterCycleAndQuitCancel(t *testing.T) { + t.Parallel() + + model := New(".", nil, []string{"md"}, false, nil, nil) + model.state = stateResults + model.warningLinks = []checker.Result{{Link: checker.Link{URL: "https://warn.example"}}} + model.deadLinks = []checker.Result{{Link: checker.Link{URL: "https://dead.example"}}} + + filterKey := tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{'f'}} + updated, cmd := model.Update(filterKey) + require.NotNil(t, updated) + assert.Nil(t, cmd) + updatedModel, ok := updated.(Model) + require.True(t, ok) + assert.Equal(t, filterWarnings, updatedModel.filter) + + cancelCalled := false + updatedModel.state = stateChecking + updatedModel.checkerState.CancelFunc = func() { cancelCalled = true } + updated, cmd = updatedModel.Update(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{'q'}}) + require.NotNil(t, updated) + require.NotNil(t, cmd) + updatedModel, ok = updated.(Model) + require.True(t, ok) + assert.True(t, cancelCalled) + assert.True(t, updatedModel.quitting) +} + +func TestGetFilteredResultsAndView(t *testing.T) { + t.Parallel() + + model := New(".", nil, []string{"md"}, false, nil, nil) + model.state = stateResults + model.files = []string{"README.md"} + model.links = []checker.Link{{URL: "https://alive.example"}} + model.aliveLinks = []checker.Result{{Status: checker.StatusAlive, Link: checker.Link{URL: "https://alive.example"}}} + model.warningLinks = []checker.Result{{Status: checker.StatusRedirect, Link: checker.Link{URL: "https://warn.example"}}} + model.deadLinks = []checker.Result{{Status: checker.StatusDead, Link: checker.Link{URL: "https://dead.example"}}} + model.duplicateLinks = []checker.Result{{Status: checker.StatusDuplicate, Link: checker.Link{URL: "https://dup.example"}}} + + assert.Len(t, model.getFilteredResults(), 3) + model.filter = filterDead + assert.Len(t, model.getFilteredResults(), 1) + + view := model.View() + assert.Contains(t, view, "Filter:") + assert.Contains(t, view, "✓ 1 alive") + assert.Contains(t, view, "⚠ 1 warnings") +} + +func TestKeyMapHelp(t *testing.T) { + t.Parallel() + + keys := DefaultKeyMap() + assert.Len(t, keys.ShortHelp(), 5) + assert.Len(t, keys.FullHelp(), 3) + assert.True(t, key.Matches(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{'f'}}, keys.Filter)) +} diff --git a/internal/ui/main_test.go b/internal/ui/main_test.go new file mode 100644 index 0000000..240726f --- /dev/null +++ b/internal/ui/main_test.go @@ -0,0 +1,11 @@ +package ui + +import ( + "testing" + + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} diff --git a/testdata/bench/duplicate-heavy/README.md b/testdata/bench/duplicate-heavy/README.md new file mode 100644 index 0000000..00d16cf --- /dev/null +++ b/testdata/bench/duplicate-heavy/README.md @@ -0,0 +1,12 @@ +# Duplicate Heavy Fixture + +https://repeat.example/resource +https://repeat.example/resource +https://repeat.example/resource +https://repeat.example/resource +https://repeat.example/resource +https://ignored.example/path +https://ignored.example/path +https://ignored.example/path +https://unique.example/one +https://unique.example/two diff --git a/testdata/bench/duplicate-heavy/data.json b/testdata/bench/duplicate-heavy/data.json new file mode 100644 index 0000000..e8a49cb --- /dev/null +++ b/testdata/bench/duplicate-heavy/data.json @@ -0,0 +1,7 @@ +{ + "primary": "https://repeat.example/resource", + "secondary": "https://repeat.example/resource", + "tertiary": "https://repeat.example/resource", + "ignored": "https://ignored.example/path", + "other": "https://unique.example/three" +} diff --git a/testdata/bench/http-scenarios/README.md b/testdata/bench/http-scenarios/README.md new file mode 100644 index 0000000..1cd878b --- /dev/null +++ b/testdata/bench/http-scenarios/README.md @@ -0,0 +1,6 @@ +# HTTP Scenarios + +[alive]({{ALIVE_URL}}) +[redirect]({{REDIRECT_URL}}) +[dead]({{DEAD_URL}}) +[error]({{ERROR_URL}}) diff --git a/testdata/bench/http-scenarios/links.json b/testdata/bench/http-scenarios/links.json new file mode 100644 index 0000000..f56d89e --- /dev/null +++ b/testdata/bench/http-scenarios/links.json @@ -0,0 +1,6 @@ +{ + "alive": "{{ALIVE_URL}}", + "redirect": "{{REDIRECT_URL}}", + "dead": "{{DEAD_URL}}", + "error": "{{ERROR_URL}}" +} diff --git a/testdata/bench/http-scenarios/settings.yaml b/testdata/bench/http-scenarios/settings.yaml new file mode 100644 index 0000000..576bf64 --- /dev/null +++ b/testdata/bench/http-scenarios/settings.yaml @@ -0,0 +1,5 @@ +links: + - "{{ALIVE_URL}}" + - "{{REDIRECT_URL}}" + - "{{DEAD_URL}}" + - "{{ERROR_URL}}" diff --git a/testdata/bench/large/api/endpoints.json b/testdata/bench/large/api/endpoints.json new file mode 100644 index 0000000..813fb80 --- /dev/null +++ b/testdata/bench/large/api/endpoints.json @@ -0,0 +1,6 @@ +{ + "users": "https://api.example.com/users", + "projects": "https://api.example.com/projects", + "search": "https://api.example.com/search", + "health": "https://api.example.com/health" +} diff --git a/testdata/bench/large/api/openapi.json b/testdata/bench/large/api/openapi.json new file mode 100644 index 0000000..0e72698 --- /dev/null +++ b/testdata/bench/large/api/openapi.json @@ -0,0 +1,9 @@ +{ + "servers": [ + { "url": "https://api.example.com" }, + { "url": "https://backup-api.example.com" } + ], + "externalDocs": { + "url": "https://example.com/api/docs" + } +} diff --git a/testdata/bench/large/config/base.yaml b/testdata/bench/large/config/base.yaml new file mode 100644 index 0000000..b20e9dc --- /dev/null +++ b/testdata/bench/large/config/base.yaml @@ -0,0 +1,7 @@ +site: + homepage: https://example.com + status: https://status.example.com + blog: https://example.com/blog +links: + - https://example.com/releases + - https://example.com/changelog diff --git a/testdata/bench/large/config/overrides.yaml b/testdata/bench/large/config/overrides.yaml new file mode 100644 index 0000000..8f5cace --- /dev/null +++ b/testdata/bench/large/config/overrides.yaml @@ -0,0 +1,6 @@ +docs: + guide: https://example.com/guide + migration: https://example.com/migration + security: https://example.com/security +support: + email: https://example.com/support diff --git a/testdata/bench/large/data/regions.toml b/testdata/bench/large/data/regions.toml new file mode 100644 index 0000000..fb8fb7c --- /dev/null +++ b/testdata/bench/large/data/regions.toml @@ -0,0 +1,6 @@ +us = "https://us.example.com" +eu = "https://eu.example.com" +apac = "https://apac.example.com" + +[fallback] +global = "https://example.com/global" diff --git a/testdata/bench/large/data/services.toml b/testdata/bench/large/data/services.toml new file mode 100644 index 0000000..a54dc7a --- /dev/null +++ b/testdata/bench/large/data/services.toml @@ -0,0 +1,8 @@ +[services] +docs = "https://docs.example.com" +status = "https://status.example.com" +cdn = "https://cdn.example.com" + +[integrations] +pagerduty = "https://example.com/integrations/pagerduty" +slack = "https://example.com/integrations/slack" diff --git a/testdata/bench/large/docs/README.md b/testdata/bench/large/docs/README.md new file mode 100644 index 0000000..8f05645 --- /dev/null +++ b/testdata/bench/large/docs/README.md @@ -0,0 +1,5 @@ +# Large Fixture + +https://example.com/start +https://example.com/install +https://example.com/configure diff --git a/testdata/bench/large/docs/architecture.md b/testdata/bench/large/docs/architecture.md new file mode 100644 index 0000000..e3a6574 --- /dev/null +++ b/testdata/bench/large/docs/architecture.md @@ -0,0 +1,6 @@ +# Architecture + +Links: +- https://example.com/architecture/overview +- https://example.com/architecture/runtime +- https://example.com/architecture/storage diff --git a/testdata/bench/large/docs/faq.md b/testdata/bench/large/docs/faq.md new file mode 100644 index 0000000..140d325 --- /dev/null +++ b/testdata/bench/large/docs/faq.md @@ -0,0 +1,6 @@ +# FAQ + +Answers: +https://example.com/faq/install +https://example.com/faq/upgrade +https://example.com/faq/debug diff --git a/testdata/bench/large/feeds/events.xml b/testdata/bench/large/feeds/events.xml new file mode 100644 index 0000000..02d4bd2 --- /dev/null +++ b/testdata/bench/large/feeds/events.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/testdata/bench/large/feeds/releases.xml b/testdata/bench/large/feeds/releases.xml new file mode 100644 index 0000000..5e7a1dd --- /dev/null +++ b/testdata/bench/large/feeds/releases.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/testdata/bench/large/nested/deep/catalog.json b/testdata/bench/large/nested/deep/catalog.json new file mode 100644 index 0000000..59eef6c --- /dev/null +++ b/testdata/bench/large/nested/deep/catalog.json @@ -0,0 +1,7 @@ +{ + "catalog": [ + "https://example.com/deep/catalog/a", + "https://example.com/deep/catalog/b", + "https://example.com/deep/catalog/c" + ] +} diff --git a/testdata/bench/large/nested/deep/guide.md b/testdata/bench/large/nested/deep/guide.md new file mode 100644 index 0000000..bfc13bf --- /dev/null +++ b/testdata/bench/large/nested/deep/guide.md @@ -0,0 +1,5 @@ +# Deep Guide + +https://example.com/deep/one +https://example.com/deep/two +https://example.com/deep/three diff --git a/testdata/bench/medium/config/app.json b/testdata/bench/medium/config/app.json new file mode 100644 index 0000000..f10b4f9 --- /dev/null +++ b/testdata/bench/medium/config/app.json @@ -0,0 +1,6 @@ +{ + "homepage": "https://example.com", + "login": "https://example.com/login", + "logout": "https://example.com/logout", + "dashboard": "https://example.com/dashboard" +} diff --git a/testdata/bench/medium/config/services.yaml b/testdata/bench/medium/config/services.yaml new file mode 100644 index 0000000..d33bd93 --- /dev/null +++ b/testdata/bench/medium/config/services.yaml @@ -0,0 +1,7 @@ +services: + docs: https://docs.example.com + api: https://api.example.com/v1 + metrics: https://metrics.example.com +pages: + - https://example.com/pricing + - https://example.com/contact diff --git a/testdata/bench/medium/docs/guide.md b/testdata/bench/medium/docs/guide.md new file mode 100644 index 0000000..dbd8c79 --- /dev/null +++ b/testdata/bench/medium/docs/guide.md @@ -0,0 +1,5 @@ +# Guide + +See https://example.com/guide/start and https://example.com/guide/install. + +[Troubleshooting](https://example.com/guide/troubleshoot) diff --git a/testdata/bench/medium/docs/tutorial.md b/testdata/bench/medium/docs/tutorial.md new file mode 100644 index 0000000..5f07353 --- /dev/null +++ b/testdata/bench/medium/docs/tutorial.md @@ -0,0 +1,5 @@ +# Tutorial + +Visit https://example.com/tutorial/part-1 and https://example.com/tutorial/part-2. + +Reference: https://example.com/tutorial/reference diff --git a/testdata/bench/medium/feed.xml b/testdata/bench/medium/feed.xml new file mode 100644 index 0000000..f1f5ef8 --- /dev/null +++ b/testdata/bench/medium/feed.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/testdata/bench/medium/integrations.toml b/testdata/bench/medium/integrations.toml new file mode 100644 index 0000000..a5ff50f --- /dev/null +++ b/testdata/bench/medium/integrations.toml @@ -0,0 +1,6 @@ +slack = "https://example.com/integrations/slack" +github = "https://example.com/integrations/github" + +[providers] +docker = "https://example.com/integrations/docker" +kubernetes = "https://example.com/integrations/kubernetes" diff --git a/testdata/bench/medium/nested/catalog.json b/testdata/bench/medium/nested/catalog.json new file mode 100644 index 0000000..38aa617 --- /dev/null +++ b/testdata/bench/medium/nested/catalog.json @@ -0,0 +1,7 @@ +{ + "items": [ + "https://example.com/catalog/1", + "https://example.com/catalog/2", + "https://example.com/catalog/3" + ] +} diff --git a/testdata/bench/medium/nested/reference.md b/testdata/bench/medium/nested/reference.md new file mode 100644 index 0000000..8dbfdc3 --- /dev/null +++ b/testdata/bench/medium/nested/reference.md @@ -0,0 +1,3 @@ +# Reference + +Lookup https://example.com/reference/a, https://example.com/reference/b, and https://example.com/reference/c. diff --git a/testdata/bench/small/README.md b/testdata/bench/small/README.md new file mode 100644 index 0000000..7ecf15f --- /dev/null +++ b/testdata/bench/small/README.md @@ -0,0 +1,6 @@ +# Small Fixture + +Primary docs: https://example.com/docs +Status page: https://status.example.com + +[API Reference](https://example.com/api) diff --git a/testdata/bench/small/config.json b/testdata/bench/small/config.json new file mode 100644 index 0000000..f2320c3 --- /dev/null +++ b/testdata/bench/small/config.json @@ -0,0 +1,5 @@ +{ + "docs": "https://example.com/docs", + "api": "https://example.com/api", + "support": "https://support.example.com" +} diff --git a/testdata/bench/small/feed.xml b/testdata/bench/small/feed.xml new file mode 100644 index 0000000..1ab6a4f --- /dev/null +++ b/testdata/bench/small/feed.xml @@ -0,0 +1,4 @@ + + https://example.com/blog + + diff --git a/testdata/bench/small/links.toml b/testdata/bench/small/links.toml new file mode 100644 index 0000000..ef6ac48 --- /dev/null +++ b/testdata/bench/small/links.toml @@ -0,0 +1,5 @@ +homepage = "https://example.com" +guide = "https://example.com/guide" + +[support] +faq = "https://example.com/faq" diff --git a/testdata/bench/small/settings.yaml b/testdata/bench/small/settings.yaml new file mode 100644 index 0000000..d927d62 --- /dev/null +++ b/testdata/bench/small/settings.yaml @@ -0,0 +1,6 @@ +service: + homepage: https://example.com + healthcheck: https://status.example.com/health +links: + - https://example.com/changelog + - https://example.com/releases From bd70ce2491e0fbe2e2c7799052f6e1ea9f67688f Mon Sep 17 00:00:00 2001 From: Leonardo Maldonado Date: Mon, 13 Apr 2026 08:32:48 +0200 Subject: [PATCH 03/13] fix(cmd): align interactive mode and clean lint Route interactive through an injected runner, pass configured checker options into the UI path, and clear the repository lint backlog while keeping tests and integration coverage green. --- cmd/check_output.go | 17 ++-- cmd/check_print.go | 80 +++++++++--------- cmd/check_runner.go | 135 +++++++++++++++++++++---------- cmd/fix_runner.go | 96 ++++++++++++---------- cmd/helpers_test.go | 6 +- cmd/integration_test.go | 2 +- cmd/interactive.go | 60 ++------------ cmd/interactive_runner.go | 88 ++++++++++++++++++++ cmd/runner_test.go | 66 +++++++++++---- cmd/runtime.go | 24 ++++++ cmd/write_helpers.go | 18 +++++ internal/checker/checker_test.go | 42 +++++----- internal/fixer/fixer.go | 36 +++++---- internal/parser/parser.go | 1 - internal/scanner/scanner_test.go | 8 +- internal/stats/stats.go | 40 ++++----- internal/ui/app.go | 11 ++- internal/ui/app_test.go | 10 ++- internal/ui/commands.go | 28 +++++-- internal/ui/item.go | 54 +++++++------ 20 files changed, 528 insertions(+), 294 deletions(-) create mode 100644 cmd/interactive_runner.go create mode 100644 cmd/write_helpers.go diff --git a/cmd/check_output.go b/cmd/check_output.go index 017caee..280fc08 100644 --- a/cmd/check_output.go +++ b/cmd/check_output.go @@ -1,7 +1,6 @@ package cmd import ( - "fmt" "io" "time" @@ -89,14 +88,20 @@ func filterResults(results []checker.Result, opts checkRenderOptions) []checker. // outputText prints results as human-readable text to stdout. // This is the default output mode when no format flag is specified. -func outputText(w io.Writer, results []checker.Result, summary checker.Summary, urlFilter *filter.Filter, opts checkRenderOptions) { +func outputText( + w io.Writer, + results []checker.Result, + summary checker.Summary, + urlFilter *filter.Filter, + opts checkRenderOptions, +) { ignoredCount := getFilterIgnoredCount(urlFilter) printSummaryLine(w, summary, ignoredCount) filtered := filterResults(results, opts) if len(filtered) == 0 { - fmt.Fprintln(w, getEmptyResultsMessage(summary, opts)) + writeln(w, getEmptyResultsMessage(summary, opts)) maybeShowIgnored(w, urlFilter, opts) return } @@ -120,13 +125,13 @@ func getFilterIgnoredCount(urlFilter *filter.Filter) int { // printSummaryLine prints the summary statistics line. func printSummaryLine(w io.Writer, summary checker.Summary, ignoredCount int) { - fmt.Fprintln(w) + writeln(w) if ignoredCount > 0 { - fmt.Fprintf(w, "Summary: %d alive | %d warnings | %d dead | %d duplicates | %d ignored\n\n", + writef(w, "Summary: %d alive | %d warnings | %d dead | %d duplicates | %d ignored\n\n", summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates, ignoredCount) } else { - fmt.Fprintf(w, "Summary: %d alive | %d warnings | %d dead | %d duplicates\n\n", + writef(w, "Summary: %d alive | %d warnings | %d dead | %d duplicates\n\n", summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates) } diff --git a/cmd/check_print.go b/cmd/check_print.go index 379e56a..5ac435e 100644 --- a/cmd/check_print.go +++ b/cmd/check_print.go @@ -23,10 +23,10 @@ func printProgressMessage(w io.Writer, total, afterFilter, unique, duplicates, i } if len(parts) > 0 { - fmt.Fprintf(w, "Found %d link(s), checking %d unique URLs (%s)...\n", + writef(w, "Found %d link(s), checking %d unique URLs (%s)...\n", total, unique, strings.Join(parts, ", ")) } else { - fmt.Fprintf(w, "Found %d link(s), checking...\n", afterFilter) + writef(w, "Found %d link(s), checking...\n", afterFilter) } } @@ -36,11 +36,11 @@ func printSection(w io.Writer, title string, results []checker.Result, printer f if len(results) == 0 { return } - fmt.Fprintf(w, "=== %s (%d) ===\n\n", title, len(results)) + writef(w, "=== %s (%d) ===\n\n", title, len(results)) for _, r := range results { printer(w, r) } - fmt.Fprintln(w) + writeln(w) } // printResult dispatches to the appropriate printer based on result status. @@ -59,77 +59,77 @@ func printResult(w io.Writer, r checker.Result) { // printAliveResult formats and prints a result with alive status. func printAliveResult(w io.Writer, r checker.Result) { - fmt.Fprintf(w, " [%d] %s\n", r.StatusCode, r.Link.URL) + writef(w, " [%d] %s\n", r.StatusCode, r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Fprintf(w, " Text: %q\n", text) + writef(w, " Text: %q\n", text) } - fmt.Fprintf(w, " File: %s", r.Link.FilePath) + writef(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Fprintf(w, ":%d", r.Link.Line) + writef(w, ":%d", r.Link.Line) } - fmt.Fprintln(w) - fmt.Fprintln(w) + writeln(w) + writeln(w) } // printWarningResult formats and prints a result with warning status (redirect or blocked). func printWarningResult(w io.Writer, r checker.Result) { - fmt.Fprintf(w, " %s %s\n", r.StatusDisplay(), r.Link.URL) + writef(w, " %s %s\n", r.StatusDisplay(), r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Fprintf(w, " Text: %q\n", text) + writef(w, " Text: %q\n", text) } if r.Status == checker.StatusRedirect && len(r.RedirectChain) > 0 { - fmt.Fprintf(w, " Chain: %s\n", formatRedirectChain(r)) - fmt.Fprintf(w, " Final: %s\n", r.FinalURL) + writef(w, " Chain: %s\n", formatRedirectChain(r)) + writef(w, " Final: %s\n", r.FinalURL) } - fmt.Fprintf(w, " File: %s", r.Link.FilePath) + writef(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Fprintf(w, ":%d", r.Link.Line) + writef(w, ":%d", r.Link.Line) } - fmt.Fprintln(w) - fmt.Fprintf(w, " Note: %s\n\n", r.Status.Description()) + writeln(w) + writef(w, " Note: %s\n\n", r.Status.Description()) } // printDeadResult formats and prints a result with dead or error status. func printDeadResult(w io.Writer, r checker.Result) { - fmt.Fprintf(w, " %s %s\n", r.StatusDisplay(), r.Link.URL) + writef(w, " %s %s\n", r.StatusDisplay(), r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Fprintf(w, " Text: %q\n", text) + writef(w, " Text: %q\n", text) } - fmt.Fprintf(w, " File: %s", r.Link.FilePath) + writef(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Fprintf(w, ":%d", r.Link.Line) + writef(w, ":%d", r.Link.Line) } - fmt.Fprintln(w) + writeln(w) if r.Error != "" { - fmt.Fprintf(w, " Error: %s\n", r.Error) + writef(w, " Error: %s\n", r.Error) } - fmt.Fprintln(w) + writeln(w) } // printDuplicateResult formats and prints a result that is a duplicate of another link. func printDuplicateResult(w io.Writer, r checker.Result) { - fmt.Fprintf(w, " [DUPLICATE] %s\n", r.Link.URL) + writef(w, " [DUPLICATE] %s\n", r.Link.URL) if text := helpers.TruncateText(r.Link.Text, 50); text != "" { - fmt.Fprintf(w, " Text: %q\n", text) + writef(w, " Text: %q\n", text) } - fmt.Fprintf(w, " File: %s", r.Link.FilePath) + writef(w, " File: %s", r.Link.FilePath) if r.Link.Line > 0 { - fmt.Fprintf(w, ":%d", r.Link.Line) + writef(w, ":%d", r.Link.Line) } - fmt.Fprintln(w) + writeln(w) if r.DuplicateOf != nil { - fmt.Fprintf(w, " Same as: %s", r.DuplicateOf.Link.FilePath) + writef(w, " Same as: %s", r.DuplicateOf.Link.FilePath) if r.DuplicateOf.Link.Line > 0 { - fmt.Fprintf(w, ":%d", r.DuplicateOf.Link.Line) + writef(w, ":%d", r.DuplicateOf.Link.Line) } - fmt.Fprintf(w, " → Status: %s\n", r.DuplicateOf.Status.Label()) + writef(w, " → Status: %s\n", r.DuplicateOf.Status.Label()) } - fmt.Fprintln(w) + writeln(w) } // printIgnoredURLs displays the list of URLs that were ignored by filter rules. @@ -139,15 +139,15 @@ func printIgnoredURLs(w io.Writer, urlFilter *filter.Filter) { return } - fmt.Fprintf(w, "\n=== Ignored URLs (%d) ===\n\n", len(ignored)) + writef(w, "\n=== Ignored URLs (%d) ===\n\n", len(ignored)) for _, ig := range ignored { - fmt.Fprintf(w, " [IGNORED] %s\n", ig.URL) - fmt.Fprintf(w, " File: %s", ig.File) + writef(w, " [IGNORED] %s\n", ig.URL) + writef(w, " File: %s", ig.File) if ig.Line > 0 { - fmt.Fprintf(w, ":%d", ig.Line) + writef(w, ":%d", ig.Line) } - fmt.Fprintln(w) - fmt.Fprintf(w, " Reason: %s %q\n\n", ig.Type, ig.Rule) + writeln(w) + writef(w, " Reason: %s %q\n\n", ig.Type, ig.Rule) } } diff --git a/cmd/check_runner.go b/cmd/check_runner.go index 58fde6b..a303238 100644 --- a/cmd/check_runner.go +++ b/cmd/check_runner.go @@ -10,7 +10,7 @@ import ( "github.com/leonardomso/gone/internal/stats" ) -type CheckOptions struct { +type checkOptions struct { OutputFormat string OutputFile string Concurrency int @@ -31,12 +31,12 @@ type CheckOptions struct { } type checkRunner struct { - opts CheckOptions + opts checkOptions env CommandEnv io IOStreams } -func newCheckRunner(opts CheckOptions, env CommandEnv, streams IOStreams) *checkRunner { +func newCheckRunner(opts checkOptions, env CommandEnv, streams IOStreams) *checkRunner { return &checkRunner{ opts: opts, env: env, @@ -44,8 +44,8 @@ func newCheckRunner(opts CheckOptions, env CommandEnv, streams IOStreams) *check } } -func currentCheckOptions() CheckOptions { - return CheckOptions{ +func currentCheckOptions() checkOptions { + return checkOptions{ OutputFormat: outputFormat, OutputFile: outputFile, Concurrency: concurrency, @@ -79,12 +79,12 @@ func (r *checkRunner) renderOptions() checkRenderOptions { func (r *checkRunner) Run(args []string) int { perf := r.env.NewStats() if err := r.validateFlags(); err != nil { - return r.fail(1, "Invalid flags", err) + return r.fail("Invalid flags", err) } loadedCfg, err := r.env.LoadConfig(r.opts.NoConfig) if err != nil { - return r.fail(1, "Config error", err) + return r.fail("Config error", err) } path := getPathArg(args) @@ -93,12 +93,17 @@ func (r *checkRunner) Run(args []string) int { files, err := r.scanFilesWithConfig(path, loadedCfg, perf, useStructuredOutput) if err != nil { - return r.fail(1, "Error scanning directory", err) + return r.fail("Error scanning directory", err) } - links, urlFilter, done, err := r.parseAndFilterLinksWithConfig(files, loadedCfg, perf, useStructuredOutput) + links, urlFilter, done, err := r.parseAndFilterLinksWithConfig( + files, + loadedCfg, + perf, + useStructuredOutput, + ) if err != nil { - return r.fail(1, err.Error(), nil) + return r.fail(err.Error(), nil) } if done { return 0 @@ -110,7 +115,7 @@ func (r *checkRunner) Run(args []string) int { files, results, summary, urlFilter, perf, useStructuredOutput, effectiveFormat, loadedCfg.GetShowStats(r.opts.ShowStats), ); err != nil { - return r.fail(1, err.Error(), nil) + return r.fail(err.Error(), nil) } if summary.HasDeadLinks() { @@ -119,20 +124,20 @@ func (r *checkRunner) Run(args []string) int { return 0 } -func (r *checkRunner) fail(code int, message string, err error) int { +func (r *checkRunner) fail(message string, err error) int { if err != nil { if message != "" { - fmt.Fprintf(r.io.ErrOut, "%s: %v\n", message, err) + writef(r.io.ErrOut, "%s: %v\n", message, err) } else { - fmt.Fprintf(r.io.ErrOut, "%v\n", err) + writef(r.io.ErrOut, "%v\n", err) } - return code + return 1 } if message != "" { - fmt.Fprintln(r.io.ErrOut, message) + writeln(r.io.ErrOut, message) } - return code + return 1 } func (r *checkRunner) validateFlags() error { @@ -156,7 +161,7 @@ func (r *checkRunner) scanFilesWithConfig( effectiveTypes := cfg.GetTypes(r.opts.FileTypes, []string{"md"}) if err := validateFileTypes(effectiveTypes); err != nil { - return nil, fmt.Errorf("Invalid file types: %w", err) + return nil, fmt.Errorf("invalid file types: %w", err) } scanOpts := cfg.BuildScanOptions(path, r.opts.FileTypes, []string{"md"}) @@ -168,7 +173,7 @@ func (r *checkRunner) scanFilesWithConfig( perf.EndScan(len(files)) if !useStructuredOutput { - fmt.Fprintf(r.io.Out, "Found %d file(s) of type(s): %s\n", len(files), strings.Join(effectiveTypes, ", ")) + writef(r.io.Out, "Found %d file(s) of type(s): %s\n", len(files), strings.Join(effectiveTypes, ", ")) } return files, nil @@ -181,20 +186,31 @@ func (r *checkRunner) parseAndFilterLinksWithConfig( parserLinks, err := r.env.ExtractLinks(files, cfg.GetStrict(r.opts.StrictMode)) if err != nil { - return nil, nil, false, fmt.Errorf("Error parsing files: %w", err) + return nil, nil, false, fmt.Errorf("error parsing files: %w", err) } if len(parserLinks) == 0 { perf.EndParse(0, 0, 0, 0) - if err := r.handleEmptyLinksWithStats(files, useStructuredOutput, perf, cfg.GetOutputFormat(r.opts.OutputFormat), cfg.GetShowStats(r.opts.ShowStats)); err != nil { - return nil, nil, false, fmt.Errorf("Error formatting output: %w", err) + if err := r.handleEmptyLinksWithStats( + files, + useStructuredOutput, + perf, + cfg.GetOutputFormat(r.opts.OutputFormat), + cfg.GetShowStats(r.opts.ShowStats), + ); err != nil { + return nil, nil, false, fmt.Errorf("error formatting output: %w", err) } return nil, nil, true, nil } - urlFilter, err := r.env.CreateFilterWithConfig(cfg.Config(), r.opts.IgnoreDomains, r.opts.IgnorePatterns, r.opts.IgnoreRegex) + urlFilter, err := r.env.CreateFilterWithConfig( + cfg.Config(), + r.opts.IgnoreDomains, + r.opts.IgnorePatterns, + r.opts.IgnoreRegex, + ) if err != nil { - return nil, nil, false, fmt.Errorf("Error creating filter: %w", err) + return nil, nil, false, fmt.Errorf("error creating filter: %w", err) } links := FilterParserLinks(parserLinks, urlFilter) @@ -208,8 +224,15 @@ func (r *checkRunner) parseAndFilterLinksWithConfig( } if len(links) == 0 { - if err := r.handleAllFilteredWithStats(files, useStructuredOutput, urlFilter, perf, cfg.GetOutputFormat(r.opts.OutputFormat), cfg.GetShowStats(r.opts.ShowStats)); err != nil { - return nil, nil, false, fmt.Errorf("Error formatting output: %w", err) + if err := r.handleAllFilteredWithStats( + files, + useStructuredOutput, + urlFilter, + perf, + cfg.GetOutputFormat(r.opts.OutputFormat), + cfg.GetShowStats(r.opts.ShowStats), + ); err != nil { + return nil, nil, false, fmt.Errorf("error formatting output: %w", err) } return nil, urlFilter, true, nil } @@ -235,30 +258,50 @@ func (r *checkRunner) routeOutputWithConfig( ) error { switch { case useStructuredOutput: - return r.handleStructuredOutputWithStats(files, results, summary, urlFilter, perf, effectiveFormat, effectiveShowStats) + return r.handleStructuredOutputWithStats( + files, + results, + summary, + urlFilter, + perf, + effectiveFormat, + effectiveShowStats, + ) case r.opts.OutputFile != "": return r.handleFileOutputWithStats(files, results, summary, urlFilter, perf, effectiveShowStats) default: outputText(r.io.Out, results, summary, urlFilter, r.renderOptions()) if effectiveShowStats { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return nil } } func (r *checkRunner) handleEmptyLinksWithStats( - files []string, useStructuredOutput bool, perf *stats.Stats, effectiveFormat string, effectiveShowStats bool, + files []string, + useStructuredOutput bool, + perf *stats.Stats, + effectiveFormat string, + effectiveShowStats bool, ) error { switch { case useStructuredOutput: - return r.handleStructuredOutputWithStats(files, nil, checker.Summary{}, nil, perf, effectiveFormat, effectiveShowStats) + return r.handleStructuredOutputWithStats( + files, + nil, + checker.Summary{}, + nil, + perf, + effectiveFormat, + effectiveShowStats, + ) case r.opts.OutputFile != "": return r.handleFileOutputWithStats(files, nil, checker.Summary{}, nil, perf, effectiveShowStats) default: - fmt.Fprintln(r.io.Out, "No links found.") + writeln(r.io.Out, "No links found.") if effectiveShowStats { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return nil } @@ -270,16 +313,24 @@ func (r *checkRunner) handleAllFilteredWithStats( ) error { switch { case useStructuredOutput: - return r.handleStructuredOutputWithStats(files, nil, checker.Summary{}, urlFilter, perf, effectiveFormat, effectiveShowStats) + return r.handleStructuredOutputWithStats( + files, + nil, + checker.Summary{}, + urlFilter, + perf, + effectiveFormat, + effectiveShowStats, + ) case r.opts.OutputFile != "": return r.handleFileOutputWithStats(files, nil, checker.Summary{}, urlFilter, perf, effectiveShowStats) default: - fmt.Fprintln(r.io.Out, "\nAll links were ignored by filter rules.") + writeln(r.io.Out, "\nAll links were ignored by filter rules.") if r.opts.ShowIgnored && urlFilter != nil { printIgnoredURLs(r.io.Out, urlFilter) } if effectiveShowStats { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return nil } @@ -296,7 +347,7 @@ func (r *checkRunner) handleStructuredOutputWithStats( data, err := r.env.FormatReport(report, output.Format(effectiveFormat)) if err != nil { - return fmt.Errorf("Error formatting output: %w", err) + return fmt.Errorf("error formatting output: %w", err) } _, err = r.io.Out.Write(data) @@ -313,19 +364,19 @@ func (r *checkRunner) handleFileOutputWithStats( } if err := r.env.WriteToFile(report, r.opts.OutputFile); err != nil { - return fmt.Errorf("Error writing file: %w", err) + return fmt.Errorf("error writing file: %w", err) } - fmt.Fprintf(r.io.Out, "Wrote report to %s\n", r.opts.OutputFile) - fmt.Fprintf(r.io.Out, "\nSummary: %d alive | %d warnings | %d dead | %d duplicates", + writef(r.io.Out, "Wrote report to %s\n", r.opts.OutputFile) + writef(r.io.Out, "\nSummary: %d alive | %d warnings | %d dead | %d duplicates", summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates) if urlFilter != nil && urlFilter.IgnoredCount() > 0 { - fmt.Fprintf(r.io.Out, " | %d ignored", urlFilter.IgnoredCount()) + writef(r.io.Out, " | %d ignored", urlFilter.IgnoredCount()) } - fmt.Fprintln(r.io.Out) + writeln(r.io.Out) if effectiveShowStats { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return nil diff --git a/cmd/fix_runner.go b/cmd/fix_runner.go index dedc111..0ddfcc9 100644 --- a/cmd/fix_runner.go +++ b/cmd/fix_runner.go @@ -2,7 +2,6 @@ package cmd import ( "bufio" - "fmt" "io" "strings" @@ -10,7 +9,7 @@ import ( "github.com/leonardomso/gone/internal/fixer" ) -type FixOptions struct { +type fixOptions struct { Yes bool DryRun bool Concurrency int @@ -26,12 +25,12 @@ type FixOptions struct { } type fixRunner struct { - opts FixOptions + opts fixOptions env CommandEnv io IOStreams } -func newFixRunner(opts FixOptions, env CommandEnv, streams IOStreams) *fixRunner { +func newFixRunner(opts fixOptions, env CommandEnv, streams IOStreams) *fixRunner { return &fixRunner{ opts: opts, env: env, @@ -39,8 +38,8 @@ func newFixRunner(opts FixOptions, env CommandEnv, streams IOStreams) *fixRunner } } -func currentFixOptions() FixOptions { - return FixOptions{ +func currentFixOptions() fixOptions { + return fixOptions{ Yes: fixYes, DryRun: fixDryRun, Concurrency: fixConcurrency, @@ -61,46 +60,51 @@ func (r *fixRunner) Run(args []string) int { loadedCfg, err := r.env.LoadConfig(r.opts.NoConfig) if err != nil { - fmt.Fprintf(r.io.ErrOut, "Config error: %v\n", err) + writef(r.io.ErrOut, "Config error: %v\n", err) return 1 } path := getPathArg(args) effectiveTypes := loadedCfg.GetTypes(r.opts.FileTypes, []string{"md"}) if err := validateFileTypes(effectiveTypes); err != nil { - fmt.Fprintf(r.io.ErrOut, "Error: %v\n", err) + writef(r.io.ErrOut, "Error: %v\n", err) return 1 } perf.StartScan() files, err := r.env.FindFiles(loadedCfg.BuildScanOptions(path, r.opts.FileTypes, []string{"md"})) if err != nil { - fmt.Fprintf(r.io.ErrOut, "Error scanning directory: %v\n", err) + writef(r.io.ErrOut, "Error scanning directory: %v\n", err) return 1 } perf.EndScan(len(files)) - fmt.Fprintf(r.io.Out, "Found %d file(s) of type(s): %s\n", len(files), strings.Join(effectiveTypes, ", ")) + writef(r.io.Out, "Found %d file(s) of type(s): %s\n", len(files), strings.Join(effectiveTypes, ", ")) perf.StartParse() parserLinks, err := r.env.ExtractLinks(files, loadedCfg.GetStrict(r.opts.StrictMode)) if err != nil { - fmt.Fprintf(r.io.ErrOut, "Error parsing files: %v\n", err) + writef(r.io.ErrOut, "Error parsing files: %v\n", err) return 1 } if len(parserLinks) == 0 { perf.EndParse(0, 0, 0, 0) - fmt.Fprintln(r.io.Out, "No links found.") + writeln(r.io.Out, "No links found.") if loadedCfg.GetShowStats(r.opts.ShowStats) { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return 0 } - urlFilter, err := r.env.CreateFilterWithConfig(loadedCfg.Config(), r.opts.IgnoreDomains, r.opts.IgnorePatterns, r.opts.IgnoreRegex) + urlFilter, err := r.env.CreateFilterWithConfig( + loadedCfg.Config(), + r.opts.IgnoreDomains, + r.opts.IgnorePatterns, + r.opts.IgnoreRegex, + ) if err != nil { - fmt.Fprintf(r.io.ErrOut, "Error creating filter: %v\n", err) + writef(r.io.ErrOut, "Error creating filter: %v\n", err) return 1 } @@ -111,17 +115,19 @@ func (r *fixRunner) Run(args []string) int { perf.EndParse(len(parserLinks), uniqueURLs, duplicates, ignoredCount) if len(links) == 0 { - fmt.Fprintln(r.io.Out, "All links were ignored by filter rules.") + writeln(r.io.Out, "All links were ignored by filter rules.") if loadedCfg.GetShowStats(r.opts.ShowStats) { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return 0 } - fmt.Fprintf(r.io.Out, "Checking %d unique URL(s) for redirects...\n", uniqueURLs) + writef(r.io.Out, "Checking %d unique URL(s) for redirects...\n", uniqueURLs) perf.StartCheck() - results := r.env.NewChecker(loadedCfg.BuildCheckerOptions(r.opts.Concurrency, r.opts.Timeout, r.opts.Retries)).CheckAll(links) + results := r.env.NewChecker( + loadedCfg.BuildCheckerOptions(r.opts.Concurrency, r.opts.Timeout, r.opts.Retries), + ).CheckAll(links) perf.EndCheck() f := r.env.NewFixer() @@ -129,21 +135,21 @@ func (r *fixRunner) Run(args []string) int { changes := f.FindFixes(results) if len(changes) == 0 { - fmt.Fprintln(r.io.Out, "\nNo fixable redirects found.") + writeln(r.io.Out, "\nNo fixable redirects found.") printFixSummary(r.io.Out, results) if loadedCfg.GetShowStats(r.opts.ShowStats) { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return 0 } - fmt.Fprintln(r.io.Out) - fmt.Fprint(r.io.Out, f.Preview(changes)) + writeln(r.io.Out) + writeString(r.io.Out, f.Preview(changes)) if r.opts.DryRun { - fmt.Fprintln(r.io.Out, "Dry-run mode: no files were modified.") + writeln(r.io.Out, "Dry-run mode: no files were modified.") if loadedCfg.GetShowStats(r.opts.ShowStats) { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return 0 } @@ -151,21 +157,21 @@ func (r *fixRunner) Run(args []string) int { if r.opts.Yes { applyAllFixes(r.io.Out, f, changes) if loadedCfg.GetShowStats(r.opts.ShowStats) { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return 0 } code := runInteractiveFix(r.io, f, changes) if loadedCfg.GetShowStats(r.opts.ShowStats) { - fmt.Fprint(r.io.Out, perf.String()) + writeString(r.io.Out, perf.String()) } return code } func applyAllFixes(out io.Writer, f RedirectFixer, changes []fixer.FileChanges) { results := f.ApplyAll(changes) - fmt.Fprintln(out, fixer.DetailedSummary(results)) + writeln(out, fixer.DetailedSummary(results)) } func runInteractiveFix(streams IOStreams, f RedirectFixer, changes []fixer.FileChanges) int { @@ -182,11 +188,11 @@ func runInteractiveFix(streams IOStreams, f RedirectFixer, changes []fixer.FileC continue } - fmt.Fprintf(streams.Out, "\nFix %s? (%d change(s)) [y/n/a/q/?] ", fc.FilePath, fc.TotalFixes) + writef(streams.Out, "\nFix %s? (%d change(s)) [y/n/a/q/?] ", fc.FilePath, fc.TotalFixes) input, err := reader.ReadString('\n') if err != nil { - fmt.Fprintf(streams.ErrOut, "\nError reading input: %v\n", err) + writef(streams.ErrOut, "\nError reading input: %v\n", err) return 1 } @@ -194,13 +200,13 @@ func runInteractiveFix(streams IOStreams, f RedirectFixer, changes []fixer.FileC case "y", "yes": result, applyErr := f.ApplyToFile(fc) if applyErr != nil { - fmt.Fprintf(streams.ErrOut, "Error: %v\n", applyErr) + writef(streams.ErrOut, "Error: %v\n", applyErr) } else { - fmt.Fprintf(streams.Out, "Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) + writef(streams.Out, "Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) } allResults = append(allResults, *result) case "n", "no": - fmt.Fprintf(streams.Out, "Skipped %s\n", fc.FilePath) + writef(streams.Out, "Skipped %s\n", fc.FilePath) allResults = append(allResults, fixer.FixResult{ FilePath: fc.FilePath, Skipped: fc.TotalFixes, @@ -208,14 +214,14 @@ func runInteractiveFix(streams IOStreams, f RedirectFixer, changes []fixer.FileC case "a", "all": result, applyErr := f.ApplyToFile(fc) if applyErr != nil { - fmt.Fprintf(streams.ErrOut, "Error: %v\n", applyErr) + writef(streams.ErrOut, "Error: %v\n", applyErr) } else { - fmt.Fprintf(streams.Out, "Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) + writef(streams.Out, "Fixed %d redirect(s) in %s\n", result.Applied, fc.FilePath) } allResults = append(allResults, *result) applyAll = true case "q", "quit": - fmt.Fprintln(streams.Out, "\nQuitting. Remaining files were not modified.") + writeln(streams.Out, "\nQuitting. Remaining files were not modified.") for j := i; j < len(changes); j++ { allResults = append(allResults, fixer.FixResult{ FilePath: changes[j].FilePath, @@ -228,18 +234,18 @@ func runInteractiveFix(streams IOStreams, f RedirectFixer, changes []fixer.FileC printInteractiveHelp(streams.Out) i-- default: - fmt.Fprintln(streams.Out, "Invalid input. Use y/n/a/q/? (or type 'help')") + writeln(streams.Out, "Invalid input. Use y/n/a/q/? (or type 'help')") i-- } } - fmt.Fprintln(streams.Out) + writeln(streams.Out) printInteractiveResults(streams.Out, allResults) return 0 } func printInteractiveHelp(out io.Writer) { - fmt.Fprintln(out, ` + writeln(out, ` Interactive mode options: y, yes - Fix this file n, no - Skip this file @@ -264,16 +270,16 @@ func printInteractiveResults(out io.Writer, results []fixer.FixResult) { } if applied > 0 { - fmt.Fprintf(out, "Fixed %d redirect(s) across %d file(s).\n", applied, filesModified) + writef(out, "Fixed %d redirect(s) across %d file(s).\n", applied, filesModified) } if filesSkipped > 0 { - fmt.Fprintf(out, "Skipped %d file(s).\n", filesSkipped) + writef(out, "Skipped %d file(s).\n", filesSkipped) } } func printFixSummary(out io.Writer, results []checker.Result) { summary := checker.Summarize(results) - fmt.Fprintf(out, "\nLink status: %d alive | %d redirects | %d dead | %d errors\n", + writef(out, "\nLink status: %d alive | %d redirects | %d dead | %d errors\n", summary.Alive, summary.Redirects, summary.Dead, summary.Errors) if summary.Redirects == 0 { @@ -287,6 +293,10 @@ func printFixSummary(out io.Writer, results []checker.Result) { } } if notFixable > 0 { - fmt.Fprintf(out, "Note: %d redirect(s) lead to non-200 responses and cannot be auto-fixed.\n", notFixable) + writef( + out, + "Note: %d redirect(s) lead to non-200 responses and cannot be auto-fixed.\n", + notFixable, + ) } } diff --git a/cmd/helpers_test.go b/cmd/helpers_test.go index 7e598f0..9b74d36 100644 --- a/cmd/helpers_test.go +++ b/cmd/helpers_test.go @@ -107,7 +107,11 @@ func TestLoadedConfig_GettersAndBuilders(t *testing.T) { assert.Equal(t, []string{"md"}, loaded.GetTypes([]string{"md"}, []string{"json"})) assert.Equal(t, []string{"yaml"}, loaded.GetTypes([]string{"yaml"}, []string{"md"})) - opts := loaded.BuildCheckerOptions(checker.DefaultConcurrency, int(checker.DefaultTimeout.Seconds()), checker.DefaultMaxRetries) + opts := loaded.BuildCheckerOptions( + checker.DefaultConcurrency, + int(checker.DefaultTimeout.Seconds()), + checker.DefaultMaxRetries, + ) assert.Equal(t, 12, opts.Concurrency) assert.Equal(t, 7*time.Second, opts.Timeout) assert.Equal(t, 4, opts.MaxRetries) diff --git a/cmd/integration_test.go b/cmd/integration_test.go index 5bf5c34..0739e00 100644 --- a/cmd/integration_test.go +++ b/cmd/integration_test.go @@ -128,7 +128,7 @@ func TestCheck_StrictModeParseFailure(t *testing.T) { result := runGone(t, tmpDir, "check", ".", "--types=json", "--strict", "--no-config") require.Equal(t, 1, result.exitCode) - assert.Contains(t, result.stderr, "Error parsing files") + assert.Contains(t, result.stderr, "error parsing files") assert.Contains(t, result.stderr, "broken.json") } diff --git a/cmd/interactive.go b/cmd/interactive.go index 0f94b62..6bde1c3 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -1,14 +1,8 @@ package cmd import ( - "fmt" "os" - "strings" - "github.com/leonardomso/gone/internal/parser" - "github.com/leonardomso/gone/internal/ui" - - tea "github.com/charmbracelet/bubbletea" "github.com/spf13/cobra" ) @@ -74,52 +68,12 @@ func init() { // runInteractive launches the interactive TUI for link checking. func runInteractive(_ *cobra.Command, args []string) { - // Load configuration - loadedCfg, err := LoadConfig(iNoConfig) - if err != nil { - fmt.Fprintf(os.Stderr, "Config error: %v\n", err) - os.Exit(1) //nolint:revive // deep-exit is acceptable for CLI entry points - } - - path := "." - if len(args) > 0 { - path = args[0] - } - - // Get effective file types from config - effectiveTypes := loadedCfg.GetTypes(iFileTypes, []string{"md"}) - - // Validate file types - supportedTypes := parser.SupportedFileTypes() - supported := make(map[string]bool, len(supportedTypes)) - for _, t := range supportedTypes { - supported[t] = true - } - for _, t := range effectiveTypes { - if !supported[strings.ToLower(t)] { - fmt.Fprintf(os.Stderr, "Error: unsupported file type: %s (supported: %s)\n", - t, strings.Join(supportedTypes, ", ")) - os.Exit(1) //nolint:revive // deep-exit is acceptable for CLI entry points - } - } - - // Get effective strict mode - effectiveStrict := loadedCfg.GetStrict(iStrictMode) - - // Create filter from config and flags using shared helper - urlFilter, err := CreateFilterWithConfig(loadedCfg.Config(), iIgnoreDomains, iIgnorePatterns, iIgnoreRegex) - if err != nil { - fmt.Fprintf(os.Stderr, "Error creating filter: %v\n", err) - os.Exit(1) //nolint:revive // deep-exit is acceptable for CLI entry points - } - - // Get scan options for include/exclude patterns - scanInclude, scanExclude := loadedCfg.GetScanOptions() - - model := ui.New(path, urlFilter, effectiveTypes, effectiveStrict, scanInclude, scanExclude) - p := tea.NewProgram(model, tea.WithAltScreen()) - if _, err := p.Run(); err != nil { - fmt.Printf("Error running interactive mode: %v\n", err) - os.Exit(1) //nolint:revive // deep-exit is acceptable for CLI entry points + code := newInteractiveRunner( + currentInteractiveOptions(), + defaultCommandEnv(), + defaultIOStreams(), + ).Run(args) + if code != 0 { + os.Exit(code) } } diff --git a/cmd/interactive_runner.go b/cmd/interactive_runner.go new file mode 100644 index 0000000..f73c09a --- /dev/null +++ b/cmd/interactive_runner.go @@ -0,0 +1,88 @@ +package cmd + +import "github.com/leonardomso/gone/internal/checker" + +type interactiveOptions struct { + FileTypes []string + StrictMode bool + IgnoreDomains []string + IgnorePatterns []string + IgnoreRegex []string + NoConfig bool +} + +type interactiveRunner struct { + opts interactiveOptions + env CommandEnv + io IOStreams +} + +func newInteractiveRunner( + opts interactiveOptions, + env CommandEnv, + streams IOStreams, +) *interactiveRunner { + return &interactiveRunner{ + opts: opts, + env: env, + io: streams, + } +} + +func currentInteractiveOptions() interactiveOptions { + return interactiveOptions{ + FileTypes: append([]string{}, iFileTypes...), + StrictMode: iStrictMode, + IgnoreDomains: append([]string{}, iIgnoreDomains...), + IgnorePatterns: append([]string{}, iIgnorePatterns...), + IgnoreRegex: append([]string{}, iIgnoreRegex...), + NoConfig: iNoConfig, + } +} + +func (r *interactiveRunner) Run(args []string) int { + loadedCfg, err := r.env.LoadConfig(r.opts.NoConfig) + if err != nil { + writef(r.io.ErrOut, "Config error: %v\n", err) + return 1 + } + + effectiveTypes := loadedCfg.GetTypes(r.opts.FileTypes, []string{"md"}) + if err := validateFileTypes(effectiveTypes); err != nil { + writef(r.io.ErrOut, "Error: %v\n", err) + return 1 + } + + urlFilter, err := r.env.CreateFilterWithConfig( + loadedCfg.Config(), + r.opts.IgnoreDomains, + r.opts.IgnorePatterns, + r.opts.IgnoreRegex, + ) + if err != nil { + writef(r.io.ErrOut, "Error creating filter: %v\n", err) + return 1 + } + + scanInclude, scanExclude := loadedCfg.GetScanOptions() + model := r.env.NewInteractiveModel( + getPathArg(args), + urlFilter, + effectiveTypes, + loadedCfg.GetStrict(r.opts.StrictMode), + scanInclude, + scanExclude, + loadedCfg.BuildCheckerOptions( + checker.DefaultConcurrency, + int(checker.DefaultTimeout.Seconds()), + checker.DefaultMaxRetries, + ), + ) + + if err := r.env.RunInteractiveProgram(model, r.io); err != nil { + writef(r.io.ErrOut, "Error running interactive mode: %v\n", err) + return 1 + } + + return 0 +} diff --git a/cmd/runner_test.go b/cmd/runner_test.go index 159dc80..861d11a 100644 --- a/cmd/runner_test.go +++ b/cmd/runner_test.go @@ -91,7 +91,7 @@ func TestCheckRunner_Run_NoLinksStructuredOutput(t *testing.T) { }}, nil } - runner := newCheckRunner(CheckOptions{FileTypes: []string{"md"}}, env, IOStreams{ + runner := newCheckRunner(checkOptions{FileTypes: []string{"md"}}, env, IOStreams{ Out: &stdout, ErrOut: &stderr, }) @@ -112,7 +112,7 @@ func TestCheckRunner_Run_AllLinksIgnoredTextOutput(t *testing.T) { return []parser.Link{{URL: "https://ignored.example/path", FilePath: "README.md", Line: 3}}, nil } - runner := newCheckRunner(CheckOptions{ + runner := newCheckRunner(checkOptions{ FileTypes: []string{"md"}, ShowIgnored: true, IgnoreDomains: []string{"ignored.example"}, @@ -147,7 +147,7 @@ func TestCheckRunner_Run_FileOutputSummary(t *testing.T) { } outputPath := filepath.Join(t.TempDir(), "report.json") - runner := newCheckRunner(CheckOptions{ + runner := newCheckRunner(checkOptions{ FileTypes: []string{"md"}, OutputFile: outputPath, }, env, IOStreams{ @@ -180,7 +180,7 @@ func TestCheckRunner_Run_DeadLinksReturnExitCodeOne(t *testing.T) { } } - runner := newCheckRunner(CheckOptions{ + runner := newCheckRunner(checkOptions{ FileTypes: []string{"md"}, }, env, IOStreams{ Out: &stdout, @@ -207,9 +207,21 @@ func TestOutputTextAndPrintHelpers(t *testing.T) { } results := []checker.Result{ primary, - {Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, Status: checker.StatusDead, StatusCode: 404}, - {Link: checker.Link{URL: "https://duplicate.example", FilePath: "README.md", Line: 3}, Status: checker.StatusDuplicate, DuplicateOf: &primary}, - {Link: checker.Link{URL: "https://alive.example", FilePath: "README.md", Line: 4}, Status: checker.StatusAlive, StatusCode: 200}, + { + Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, + Status: checker.StatusDead, + StatusCode: 404, + }, + { + Link: checker.Link{URL: "https://duplicate.example", FilePath: "README.md", Line: 3}, + Status: checker.StatusDuplicate, + DuplicateOf: &primary, + }, + { + Link: checker.Link{URL: "https://alive.example", FilePath: "README.md", Line: 4}, + Status: checker.StatusAlive, + StatusCode: 200, + }, } urlFilter, err := filter.New(filter.Config{Domains: []string{"ignored.example"}}) @@ -234,8 +246,18 @@ func TestOutputText_GroupedVsFlat(t *testing.T) { t.Parallel() results := []checker.Result{ - {Link: checker.Link{URL: "https://redirect.example", FilePath: "README.md", Line: 1}, Status: checker.StatusRedirect, StatusCode: 301, FinalStatus: 200, FinalURL: "https://final.example"}, - {Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, Status: checker.StatusDead, StatusCode: 404}, + { + Link: checker.Link{URL: "https://redirect.example", FilePath: "README.md", Line: 1}, + Status: checker.StatusRedirect, + StatusCode: 301, + FinalStatus: 200, + FinalURL: "https://final.example", + }, + { + Link: checker.Link{URL: "https://dead.example", FilePath: "README.md", Line: 2}, + Status: checker.StatusDead, + StatusCode: 404, + }, } summary := checker.Summarize(results) @@ -254,9 +276,21 @@ func TestOutputText_GroupedVsFlat(t *testing.T) { func TestGetEmptyResultsMessage_Variants(t *testing.T) { t.Parallel() - assert.Equal(t, "No alive links found.", getEmptyResultsMessage(checker.Summary{}, checkRenderOptions{ShowAlive: true})) - assert.Equal(t, "No warnings found.", getEmptyResultsMessage(checker.Summary{}, checkRenderOptions{ShowWarnings: true})) - assert.Equal(t, "No dead links found.", getEmptyResultsMessage(checker.Summary{Redirects: 1}, checkRenderOptions{ShowDead: true})) + assert.Equal( + t, + "No alive links found.", + getEmptyResultsMessage(checker.Summary{}, checkRenderOptions{ShowAlive: true}), + ) + assert.Equal( + t, + "No warnings found.", + getEmptyResultsMessage(checker.Summary{}, checkRenderOptions{ShowWarnings: true}), + ) + assert.Equal( + t, + "No dead links found.", + getEmptyResultsMessage(checker.Summary{Redirects: 1}, checkRenderOptions{ShowDead: true}), + ) assert.Equal(t, "All links are alive!", getEmptyResultsMessage(checker.Summary{Alive: 3}, checkRenderOptions{})) } @@ -282,7 +316,11 @@ func TestRunInteractiveFix_Branches(t *testing.T) { input: "oops\ny\n", changes: []fixer.FileChanges{{FilePath: "README.md", TotalFixes: 1}}, expectedCode: 0, - expectOut: []string{"Invalid input.", "Fixed 1 redirect(s) in README.md", "Fixed 1 redirect(s) across 1 file(s)."}, + expectOut: []string{ + "Invalid input.", + "Fixed 1 redirect(s) in README.md", + "Fixed 1 redirect(s) across 1 file(s).", + }, }, { name: "quit", @@ -357,7 +395,7 @@ func TestFixRunner_Run_DryRun(t *testing.T) { } } - runner := newFixRunner(FixOptions{ + runner := newFixRunner(fixOptions{ FileTypes: []string{"md"}, DryRun: true, }, env, IOStreams{ diff --git a/cmd/runtime.go b/cmd/runtime.go index cc59442..dfcc17f 100644 --- a/cmd/runtime.go +++ b/cmd/runtime.go @@ -5,6 +5,7 @@ import ( "os" "time" + tea "github.com/charmbracelet/bubbletea" "github.com/leonardomso/gone/internal/checker" "github.com/leonardomso/gone/internal/config" "github.com/leonardomso/gone/internal/filter" @@ -13,6 +14,7 @@ import ( "github.com/leonardomso/gone/internal/parser" "github.com/leonardomso/gone/internal/scanner" "github.com/leonardomso/gone/internal/stats" + "github.com/leonardomso/gone/internal/ui" ) // IOStreams groups the command IO streams to make runners testable. @@ -52,6 +54,8 @@ type CommandEnv struct { CreateFilterWithConfig func(*config.Config, []string, []string, []string) (*filter.Filter, error) NewChecker func(checker.Options) LinkChecker NewFixer func() RedirectFixer + NewInteractiveModel func(string, *filter.Filter, []string, bool, []string, []string, checker.Options) tea.Model + RunInteractiveProgram func(tea.Model, IOStreams) error FormatReport func(*output.Report, output.Format) ([]byte, error) WriteToFile func(*output.Report, string) error NewStats func() *stats.Stats @@ -70,6 +74,26 @@ func defaultCommandEnv() CommandEnv { NewFixer: func() RedirectFixer { return fixer.New() }, + NewInteractiveModel: func( + path string, + urlFilter *filter.Filter, + fileTypes []string, + strictMode bool, + scanInclude, scanExclude []string, + checkerOpts checker.Options, + ) tea.Model { + return ui.New(path, urlFilter, fileTypes, strictMode, scanInclude, scanExclude, checkerOpts) + }, + RunInteractiveProgram: func(model tea.Model, streams IOStreams) error { + program := tea.NewProgram( + model, + tea.WithAltScreen(), + tea.WithInput(streams.In), + tea.WithOutput(streams.Out), + ) + _, err := program.Run() + return err + }, FormatReport: output.FormatReport, WriteToFile: output.WriteToFile, NewStats: stats.New, diff --git a/cmd/write_helpers.go b/cmd/write_helpers.go new file mode 100644 index 0000000..0302da6 --- /dev/null +++ b/cmd/write_helpers.go @@ -0,0 +1,18 @@ +package cmd + +import ( + "fmt" + "io" +) + +func writef(w io.Writer, format string, args ...any) { + _, _ = fmt.Fprintf(w, format, args...) +} + +func writeln(w io.Writer, args ...any) { + _, _ = fmt.Fprintln(w, args...) +} + +func writeString(w io.Writer, value string) { + _, _ = io.WriteString(w, value) +} diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 085d30b..cd958cf 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -588,9 +588,9 @@ func TestChecker_CheckAll_500ServerError(t *testing.T) { func TestChecker_CheckAll_HeadFallbackToGet(t *testing.T) { t.Parallel() - var requestCount int32 + var requestCount atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - atomic.AddInt32(&requestCount, 1) + requestCount.Add(1) if r.Method == http.MethodHead { w.WriteHeader(http.StatusMethodNotAllowed) return @@ -606,7 +606,7 @@ func TestChecker_CheckAll_HeadFallbackToGet(t *testing.T) { require.Len(t, results, 1) assert.Equal(t, StatusAlive, results[0].Status) - assert.GreaterOrEqual(t, atomic.LoadInt32(&requestCount), int32(2)) // HEAD then GET + assert.GreaterOrEqual(t, requestCount.Load(), int32(2)) // HEAD then GET } func TestChecker_CheckAll_Redirect301(t *testing.T) { @@ -839,9 +839,9 @@ func TestChecker_CheckAll_MultipleDifferentURLs(t *testing.T) { func TestChecker_CheckAll_RetryOn5xx(t *testing.T) { t.Parallel() - var attempts int32 + var attempts atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - count := atomic.AddInt32(&attempts, 1) + count := attempts.Add(1) if count < 3 { w.WriteHeader(http.StatusServiceUnavailable) return @@ -857,15 +857,15 @@ func TestChecker_CheckAll_RetryOn5xx(t *testing.T) { require.Len(t, results, 1) assert.Equal(t, StatusAlive, results[0].Status) - assert.GreaterOrEqual(t, atomic.LoadInt32(&attempts), int32(3)) + assert.GreaterOrEqual(t, attempts.Load(), int32(3)) } func TestChecker_CheckAll_RetryOn429(t *testing.T) { t.Parallel() - var attempts int32 + var attempts atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - count := atomic.AddInt32(&attempts, 1) + count := attempts.Add(1) if count < 2 { w.WriteHeader(http.StatusTooManyRequests) return @@ -886,9 +886,9 @@ func TestChecker_CheckAll_RetryOn429(t *testing.T) { func TestChecker_CheckAll_NoRetryOn404(t *testing.T) { t.Parallel() - var attempts int32 + var attempts atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - atomic.AddInt32(&attempts, 1) + attempts.Add(1) w.WriteHeader(http.StatusNotFound) })) defer server.Close() @@ -900,7 +900,7 @@ func TestChecker_CheckAll_NoRetryOn404(t *testing.T) { require.Len(t, results, 1) assert.Equal(t, StatusDead, results[0].Status) - assert.Equal(t, int32(1), atomic.LoadInt32(&attempts)) // No retries for 404 + assert.Equal(t, int32(1), attempts.Load()) // No retries for 404 } func TestChecker_Check_ContextCanceled(t *testing.T) { @@ -1020,19 +1020,19 @@ func TestChecker_CheckAll_ConnectionRefused(t *testing.T) { func TestChecker_CheckAll_Concurrency(t *testing.T) { t.Parallel() - var activeRequests int32 - var maxConcurrent int32 + var activeRequests atomic.Int32 + var maxConcurrent atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - current := atomic.AddInt32(&activeRequests, 1) + current := activeRequests.Add(1) for { - old := atomic.LoadInt32(&maxConcurrent) - if current <= old || atomic.CompareAndSwapInt32(&maxConcurrent, old, current) { + old := maxConcurrent.Load() + if current <= old || maxConcurrent.CompareAndSwap(old, current) { break } } time.Sleep(100 * time.Millisecond) - atomic.AddInt32(&activeRequests, -1) + activeRequests.Add(-1) w.WriteHeader(http.StatusOK) })) defer server.Close() @@ -1047,7 +1047,7 @@ func TestChecker_CheckAll_Concurrency(t *testing.T) { results := checker.CheckAll(links) assert.Len(t, results, 20) - assert.LessOrEqual(t, atomic.LoadInt32(&maxConcurrent), int32(5)) + assert.LessOrEqual(t, maxConcurrent.Load(), int32(5)) } // ============================================================================= @@ -1291,9 +1291,9 @@ func TestChecker_CheckAll_RedirectTo403StillBlocked(t *testing.T) { func TestChecker_CheckAll_501NotImplemented(t *testing.T) { t.Parallel() - var requestCount int32 + var requestCount atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - atomic.AddInt32(&requestCount, 1) + requestCount.Add(1) if r.Method == http.MethodHead { w.WriteHeader(http.StatusNotImplemented) // 501 return @@ -1310,7 +1310,7 @@ func TestChecker_CheckAll_501NotImplemented(t *testing.T) { require.Len(t, results, 1) assert.Equal(t, StatusAlive, results[0].Status) // Should have made at least 2 requests (HEAD then GET fallback) - assert.GreaterOrEqual(t, atomic.LoadInt32(&requestCount), int32(2)) + assert.GreaterOrEqual(t, requestCount.Load(), int32(2)) } func TestChecker_CheckAll_RedirectWithRelativeLocation(t *testing.T) { diff --git a/internal/fixer/fixer.go b/internal/fixer/fixer.go index e67cf01..9adf70f 100644 --- a/internal/fixer/fixer.go +++ b/internal/fixer/fixer.go @@ -243,27 +243,26 @@ func (*Fixer) Preview(changes []FileChanges) string { totalFixes += fc.TotalFixes } - b.WriteString(fmt.Sprintf("Found %d fixable redirect(s) across %d file(s):\n\n", - totalFixes, len(changes))) + appendFixf(&b, "Found %d fixable redirect(s) across %d file(s):\n\n", totalFixes, len(changes)) for _, fc := range changes { - b.WriteString(fmt.Sprintf("%s (%d fix(es))\n", fc.FilePath, fc.TotalFixes)) + appendFixf(&b, "%s (%d fix(es))\n", fc.FilePath, fc.TotalFixes) for _, fix := range fc.Fixes { lineInfo := fmt.Sprintf(" Line %d: ", fix.Line) if fix.IsRefDef { - b.WriteString(fmt.Sprintf("%s[%s] %s\n", lineInfo, fix.RefName, fix.OldURL)) - b.WriteString(fmt.Sprintf(" -> %s", fix.NewURL)) + appendFixf(&b, "%s[%s] %s\n", lineInfo, fix.RefName, fix.OldURL) + appendFixf(&b, " -> %s", fix.NewURL) if fix.RefUsages > 0 { - b.WriteString(fmt.Sprintf(" (used %d time(s))", fix.RefUsages)) + appendFixf(&b, " (used %d time(s))", fix.RefUsages) } b.WriteString("\n") } else { - b.WriteString(fmt.Sprintf("%s%s\n", lineInfo, truncateURL(fix.OldURL, 60))) - b.WriteString(fmt.Sprintf(" -> %s", truncateURL(fix.NewURL, 60))) + appendFixf(&b, "%s%s\n", lineInfo, truncateURL(fix.OldURL, 60)) + appendFixf(&b, " -> %s", truncateURL(fix.NewURL, 60)) if fix.Occurrences > 1 { - b.WriteString(fmt.Sprintf(" (%d occurrence(s))", fix.Occurrences)) + appendFixf(&b, " (%d occurrence(s))", fix.Occurrences) } b.WriteString("\n") } @@ -337,6 +336,7 @@ func (*Fixer) ApplyToFile(fc FileChanges) (*FixResult, error) { } // Write modified content back to file + //nolint:gosec // fc.FilePath originates from files already scanned in the current workspace err = os.WriteFile(fc.FilePath, []byte(modifiedContent), 0o600) if err != nil { result.Error = fmt.Errorf("writing file: %w", err) @@ -382,16 +382,16 @@ func Summary(results []FixResult) string { return "No changes made." } - b.WriteString(fmt.Sprintf("Fixed %d redirect(s) across %d file(s).\n", totalApplied, filesModified)) + appendFixf(&b, "Fixed %d redirect(s) across %d file(s).\n", totalApplied, filesModified) if totalSkipped > 0 { - b.WriteString(fmt.Sprintf("Skipped %d (URL not found in file).\n", totalSkipped)) + appendFixf(&b, "Skipped %d (URL not found in file).\n", totalSkipped) } if len(errors) > 0 { b.WriteString("\nErrors:\n") for _, e := range errors { - b.WriteString(fmt.Sprintf(" %s\n", e)) + appendFixf(&b, " %s\n", e) } } @@ -416,7 +416,7 @@ func DetailedSummary(results []FixResult) string { return "No changes made." } - b.WriteString(fmt.Sprintf("Fixed %d redirect(s) across %d file(s):\n\n", totalApplied, filesModified)) + appendFixf(&b, "Fixed %d redirect(s) across %d file(s):\n\n", totalApplied, filesModified) for _, r := range results { if r.Applied == 0 { @@ -424,11 +424,15 @@ func DetailedSummary(results []FixResult) string { } for _, change := range r.ChangedURLs { - b.WriteString(fmt.Sprintf(" %s:%d\n", r.FilePath, change.Line)) - b.WriteString(fmt.Sprintf(" %s\n", truncateURL(change.OldURL, 70))) - b.WriteString(fmt.Sprintf(" -> %s\n", truncateURL(change.NewURL, 70))) + appendFixf(&b, " %s:%d\n", r.FilePath, change.Line) + appendFixf(&b, " %s\n", truncateURL(change.OldURL, 70)) + appendFixf(&b, " -> %s\n", truncateURL(change.NewURL, 70)) } } return b.String() } + +func appendFixf(b *strings.Builder, format string, args ...any) { + _, _ = fmt.Fprintf(b, format, args...) +} diff --git a/internal/parser/parser.go b/internal/parser/parser.go index cc817bb..cce3ad9 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -254,7 +254,6 @@ func extractLinksParallelWithRegistry(filePaths []string, strict bool) ([]Link, futures := make([]pond.ResultTask[fileResult], 0, len(filePaths)) for _, path := range filePaths { - path := path futures = append(futures, pool.Submit(func() fileResult { links, err := ExtractLinksWithRegistry(path, strict) return fileResult{links: links, err: err} diff --git a/internal/scanner/scanner_test.go b/internal/scanner/scanner_test.go index d6aaa71..fc3f8cb 100644 --- a/internal/scanner/scanner_test.go +++ b/internal/scanner/scanner_test.go @@ -41,7 +41,7 @@ func TestFindFiles(t *testing.T) { assert.Len(t, files, 2) // Should find both root.md and nested.md - var names []string + names := make([]string, 0, len(files)) for _, f := range files { names = append(names, filepath.Base(f)) } @@ -76,7 +76,7 @@ func TestFindFiles(t *testing.T) { assert.Len(t, files, 3) // Should find .md, .MD, and .Md - var names []string + names := make([]string, 0, len(files)) for _, f := range files { names = append(names, filepath.Base(f)) } @@ -197,7 +197,7 @@ func TestFindFilesByTypes(t *testing.T) { assert.Len(t, files, 3) // Collect extensions found - var extensions []string + extensions := make([]string, 0, len(files)) for _, f := range files { extensions = append(extensions, filepath.Ext(f)) } @@ -218,7 +218,7 @@ func TestFindFilesByTypes(t *testing.T) { require.NoError(t, err) assert.Len(t, files, 2) - var extensions []string + extensions := make([]string, 0, len(files)) for _, f := range files { extensions = append(extensions, filepath.Ext(f)) } diff --git a/internal/stats/stats.go b/internal/stats/stats.go index d5bf0ed..c818341 100644 --- a/internal/stats/stats.go +++ b/internal/stats/stats.go @@ -179,51 +179,55 @@ func (s *Stats) String() string { // Timing breakdown b.WriteString("Timing:\n") - b.WriteString(fmt.Sprintf(" Scan files: %8s", FormatDuration(s.ScanDuration()))) + appendf(&b, " Scan files: %8s", FormatDuration(s.ScanDuration())) if total > 0 { - b.WriteString(fmt.Sprintf(" (%4.1f%%)", float64(s.ScanDuration())/float64(total)*100)) + appendf(&b, " (%4.1f%%)", float64(s.ScanDuration())/float64(total)*100) } b.WriteString("\n") - b.WriteString(fmt.Sprintf(" Parse links: %8s", FormatDuration(s.ParseDuration()))) + appendf(&b, " Parse links: %8s", FormatDuration(s.ParseDuration())) if total > 0 { - b.WriteString(fmt.Sprintf(" (%4.1f%%)", float64(s.ParseDuration())/float64(total)*100)) + appendf(&b, " (%4.1f%%)", float64(s.ParseDuration())/float64(total)*100) } b.WriteString("\n") - b.WriteString(fmt.Sprintf(" Check URLs: %8s", FormatDuration(s.CheckDuration()))) + appendf(&b, " Check URLs: %8s", FormatDuration(s.CheckDuration())) if total > 0 { - b.WriteString(fmt.Sprintf(" (%4.1f%%)", float64(s.CheckDuration())/float64(total)*100)) + appendf(&b, " (%4.1f%%)", float64(s.CheckDuration())/float64(total)*100) } b.WriteString("\n") b.WriteString(" ─────────────────────────\n") - b.WriteString(fmt.Sprintf(" Total: %8s\n", FormatDuration(total))) + appendf(&b, " Total: %8s\n", FormatDuration(total)) // Throughput b.WriteString("\nThroughput:\n") - b.WriteString(fmt.Sprintf(" Files scanned: %5d\n", s.FilesScanned)) - b.WriteString(fmt.Sprintf(" Links found: %5d\n", s.LinksFound)) - b.WriteString(fmt.Sprintf(" Unique URLs: %5d\n", s.UniqueURLs)) + appendf(&b, " Files scanned: %5d\n", s.FilesScanned) + appendf(&b, " Links found: %5d\n", s.LinksFound) + appendf(&b, " Unique URLs: %5d\n", s.UniqueURLs) if s.Duplicates > 0 { - b.WriteString(fmt.Sprintf(" Duplicates: %5d\n", s.Duplicates)) + appendf(&b, " Duplicates: %5d\n", s.Duplicates) } if s.Ignored > 0 { - b.WriteString(fmt.Sprintf(" Ignored: %5d\n", s.Ignored)) + appendf(&b, " Ignored: %5d\n", s.Ignored) } - b.WriteString(fmt.Sprintf(" URLs/second: %5.1f\n", s.URLsPerSecond())) - b.WriteString(fmt.Sprintf(" Avg response: %7s\n", FormatDuration(s.AvgResponseTime()))) + appendf(&b, " URLs/second: %5.1f\n", s.URLsPerSecond()) + appendf(&b, " Avg response: %7s\n", FormatDuration(s.AvgResponseTime())) // Memory b.WriteString("\nMemory:\n") - b.WriteString(fmt.Sprintf(" Heap in use: %8s\n", FormatBytes(s.HeapAlloc))) - b.WriteString(fmt.Sprintf(" Total alloc: %8s\n", FormatBytes(s.TotalAlloc))) - b.WriteString(fmt.Sprintf(" GC cycles: %8d\n", s.NumGC)) - b.WriteString(fmt.Sprintf(" Goroutines: %8d\n", s.NumGoroutine)) + appendf(&b, " Heap in use: %8s\n", FormatBytes(s.HeapAlloc)) + appendf(&b, " Total alloc: %8s\n", FormatBytes(s.TotalAlloc)) + appendf(&b, " GC cycles: %8d\n", s.NumGC) + appendf(&b, " Goroutines: %8d\n", s.NumGoroutine) return b.String() } +func appendf(b *strings.Builder, format string, args ...any) { + _, _ = fmt.Fprintf(b, format, args...) +} + // ToJSON returns a map suitable for JSON serialization. func (s *Stats) ToJSON() map[string]any { return map[string]any{ diff --git a/internal/ui/app.go b/internal/ui/app.go index 79ee77c..6537ceb 100644 --- a/internal/ui/app.go +++ b/internal/ui/app.go @@ -89,6 +89,7 @@ type Model struct { strictMode bool scanInclude []string scanExclude []string + checkerOpts checker.Options // Data files []string @@ -129,6 +130,7 @@ type Model struct { func New( path string, urlFilter *filter.Filter, fileTypes []string, strictMode bool, scanInclude, scanExclude []string, + checkerOpts ...checker.Options, ) Model { if path == "" { path = "." @@ -136,6 +138,10 @@ func New( if len(fileTypes) == 0 { fileTypes = []string{"md"} } + opts := checker.DefaultOptions() + if len(checkerOpts) > 0 { + opts = checkerOpts[0] + } // Initialize spinner s := spinner.New() @@ -174,6 +180,7 @@ func New( strictMode: strictMode, scanInclude: scanInclude, scanExclude: scanExclude, + checkerOpts: opts, } } @@ -301,7 +308,7 @@ func (m *Model) handleLinksExtracted(msg LinksExtractedMsg) (tea.Model, tea.Cmd) return m, nil } m.state = stateChecking - return m, StartCheckingCmd(m.links, &m.checkerState) + return m, StartCheckingCmd(m.links, &m.checkerState, m.checkerOpts) } // countUniqueURLsFromLinks counts unique URLs in a slice of checker.Link. @@ -356,7 +363,7 @@ func (m *Model) getFilteredResults() []checker.Result { switch m.filter { case filterAll: // All non-alive: warnings + dead + duplicates - var all []checker.Result + all := make([]checker.Result, 0, len(m.warningLinks)+len(m.deadLinks)+len(m.duplicateLinks)) all = append(all, m.warningLinks...) all = append(all, m.deadLinks...) all = append(all, m.duplicateLinks...) diff --git a/internal/ui/app_test.go b/internal/ui/app_test.go index b920255..8c0642a 100644 --- a/internal/ui/app_test.go +++ b/internal/ui/app_test.go @@ -139,9 +139,15 @@ func TestGetFilteredResultsAndView(t *testing.T) { model.files = []string{"README.md"} model.links = []checker.Link{{URL: "https://alive.example"}} model.aliveLinks = []checker.Result{{Status: checker.StatusAlive, Link: checker.Link{URL: "https://alive.example"}}} - model.warningLinks = []checker.Result{{Status: checker.StatusRedirect, Link: checker.Link{URL: "https://warn.example"}}} + model.warningLinks = []checker.Result{{ + Status: checker.StatusRedirect, + Link: checker.Link{URL: "https://warn.example"}, + }} model.deadLinks = []checker.Result{{Status: checker.StatusDead, Link: checker.Link{URL: "https://dead.example"}}} - model.duplicateLinks = []checker.Result{{Status: checker.StatusDuplicate, Link: checker.Link{URL: "https://dup.example"}}} + model.duplicateLinks = []checker.Result{{ + Status: checker.StatusDuplicate, + Link: checker.Link{URL: "https://dup.example"}, + }} assert.Len(t, model.getFilteredResults(), 3) model.filter = filterDead diff --git a/internal/ui/commands.go b/internal/ui/commands.go index 2d9ac40..2fd6fc6 100644 --- a/internal/ui/commands.go +++ b/internal/ui/commands.go @@ -73,22 +73,31 @@ type CheckerState struct { } // StartCheckingCmd initializes the checker and returns the first result. -func StartCheckingCmd(links []checker.Link, state *CheckerState) tea.Cmd { +func StartCheckingCmd( + links []checker.Link, + state *CheckerState, + opts checker.Options, +) tea.Cmd { return func() tea.Msg { - // Create a cancellable context ctx, cancel := context.WithCancel(context.Background()) state.CancelFunc = cancel - // Create checker with default options - opts := checker.DefaultOptions() c := checker.New(opts) - // Start checking and store the channel - state.ResultsChan = c.Check(ctx, links) + sourceResults := c.Check(ctx, links) + results := make(chan checker.Result, max(opts.Concurrency, 1)) + go func() { + defer close(results) + defer cancel() + for result := range sourceResults { + results <- result + } + }() + state.ResultsChan = results - // Get the first result result, ok := <-state.ResultsChan if !ok { + state.ResultsChan = nil return AllChecksCompleteMsg{} } return LinkCheckedMsg{Result: result} @@ -104,6 +113,11 @@ func WaitForNextResultCmd(state *CheckerState) tea.Cmd { result, ok := <-state.ResultsChan if !ok { + if state.CancelFunc != nil { + state.CancelFunc() + state.CancelFunc = nil + } + state.ResultsChan = nil return AllChecksCompleteMsg{} } return LinkCheckedMsg{Result: result} diff --git a/internal/ui/item.go b/internal/ui/item.go index 2cff7a0..2a07ccf 100644 --- a/internal/ui/item.go +++ b/internal/ui/item.go @@ -97,67 +97,71 @@ func (i ResultItem) DetailView() string { b.WriteString("┌─ Details ─────────────────────────────────────────────────────────────\n") // Status line - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Status:"), StatusBadge(r.Status))) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Status:"), StatusBadge(r.Status)) // Status-specific details switch r.Status { case checker.StatusAlive: - b.WriteString(fmt.Sprintf("│ %s %d\n", DetailLabelStyle.Render("HTTP Code:"), r.StatusCode)) + appendItemf(&b, "│ %s %d\n", DetailLabelStyle.Render("HTTP Code:"), r.StatusCode) case checker.StatusRedirect: - b.WriteString(fmt.Sprintf("│ %s %d\n", DetailLabelStyle.Render("Original:"), r.StatusCode)) - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Chain:"), formatRedirectChain(r))) - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Final URL:"), r.FinalURL)) - b.WriteString(fmt.Sprintf("│ %s %d\n", DetailLabelStyle.Render("Final Status:"), r.FinalStatus)) + appendItemf(&b, "│ %s %d\n", DetailLabelStyle.Render("Original:"), r.StatusCode) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Chain:"), formatRedirectChain(r)) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Final URL:"), r.FinalURL) + appendItemf(&b, "│ %s %d\n", DetailLabelStyle.Render("Final Status:"), r.FinalStatus) b.WriteString("│\n") - b.WriteString(fmt.Sprintf("│ %s\n", DetailNoteStyle.Render("Note: "+r.Status.Description()))) + appendItemf(&b, "│ %s\n", DetailNoteStyle.Render("Note: "+r.Status.Description())) case checker.StatusBlocked: - b.WriteString(fmt.Sprintf("│ %s %d\n", DetailLabelStyle.Render("HTTP Code:"), r.StatusCode)) + appendItemf(&b, "│ %s %d\n", DetailLabelStyle.Render("HTTP Code:"), r.StatusCode) b.WriteString("│\n") - b.WriteString(fmt.Sprintf("│ %s\n", DetailNoteStyle.Render("Note: "+r.Status.Description()))) + appendItemf(&b, "│ %s\n", DetailNoteStyle.Render("Note: "+r.Status.Description())) case checker.StatusDead: if r.StatusCode > 0 { - b.WriteString(fmt.Sprintf("│ %s %d\n", DetailLabelStyle.Render("HTTP Code:"), r.StatusCode)) + appendItemf(&b, "│ %s %d\n", DetailLabelStyle.Render("HTTP Code:"), r.StatusCode) } if len(r.RedirectChain) > 0 { - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Chain:"), formatRedirectChain(r))) - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Final URL:"), r.FinalURL)) - b.WriteString(fmt.Sprintf("│ %s %d\n", DetailLabelStyle.Render("Final Status:"), r.FinalStatus)) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Chain:"), formatRedirectChain(r)) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Final URL:"), r.FinalURL) + appendItemf(&b, "│ %s %d\n", DetailLabelStyle.Render("Final Status:"), r.FinalStatus) } if r.Error != "" { - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Error:"), r.Error)) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Error:"), r.Error) } case checker.StatusError: - b.WriteString(fmt.Sprintf("│ %s %s\n", DetailLabelStyle.Render("Error:"), r.Error)) + appendItemf(&b, "│ %s %s\n", DetailLabelStyle.Render("Error:"), r.Error) case checker.StatusDuplicate: if r.DuplicateOf != nil { - b.WriteString(fmt.Sprintf("│ %s %s", DetailLabelStyle.Render("First found:"), r.DuplicateOf.Link.FilePath)) + appendItemf(&b, "│ %s %s", DetailLabelStyle.Render("First found:"), r.DuplicateOf.Link.FilePath) if r.DuplicateOf.Link.Line > 0 { - b.WriteString(fmt.Sprintf(" (line %d)", r.DuplicateOf.Link.Line)) + appendItemf(&b, " (line %d)", r.DuplicateOf.Link.Line) } b.WriteString("\n") - b.WriteString(fmt.Sprintf("│ %s %s\n", - DetailLabelStyle.Render("Original status:"), StatusBadge(r.DuplicateOf.Status))) + appendItemf( + &b, + "│ %s %s\n", + DetailLabelStyle.Render("Original status:"), + StatusBadge(r.DuplicateOf.Status), + ) } b.WriteString("│\n") - b.WriteString(fmt.Sprintf("│ %s\n", DetailNoteStyle.Render("Note: "+r.Status.Description()))) + appendItemf(&b, "│ %s\n", DetailNoteStyle.Render("Note: "+r.Status.Description())) } // Link text if available if text := truncateText(r.Link.Text, 60); text != "" { b.WriteString("│\n") - b.WriteString(fmt.Sprintf("│ %s %q\n", DetailLabelStyle.Render("Text:"), text)) + appendItemf(&b, "│ %s %q\n", DetailLabelStyle.Render("Text:"), text) } // File location b.WriteString("│\n") - b.WriteString(fmt.Sprintf("│ %s %s", DetailLabelStyle.Render("File:"), r.Link.FilePath)) + appendItemf(&b, "│ %s %s", DetailLabelStyle.Render("File:"), r.Link.FilePath) if r.Link.Line > 0 { - b.WriteString(fmt.Sprintf(" (line %d)", r.Link.Line)) + appendItemf(&b, " (line %d)", r.Link.Line) } b.WriteString("\n") @@ -166,6 +170,10 @@ func (i ResultItem) DetailView() string { return b.String() } +func appendItemf(b *strings.Builder, format string, args ...any) { + _, _ = fmt.Fprintf(b, format, args...) +} + // formatRedirectChain formats the redirect chain for display. func formatRedirectChain(r checker.Result) string { if len(r.RedirectChain) == 0 { From 3f326b853bfca9046b1eab1d54921e2ed4e67ad0 Mon Sep 17 00:00:00 2001 From: Leonardo Maldonado Date: Mon, 13 Apr 2026 08:41:03 +0200 Subject: [PATCH 04/13] ci: fix bounded fuzz workflow --- .github/workflows/test.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e299a98..0bcb582 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,4 +27,12 @@ jobs: run: go test -tags=integration ./cmd - name: Run bounded fuzz jobs - run: go test ./internal/parser/... -run=^$ -fuzz=Fuzz -fuzztime=5s + run: | + set -euo pipefail + go test ./internal/parser -run=^$ -fuzz=FuzzCleanURLTrailing -fuzztime=5s + go test ./internal/parser -run=^$ -fuzz=FuzzBuildLineIndexAndOffsetToLineCol -fuzztime=5s + go test ./internal/parser/json -run=^$ -fuzz=FuzzJSONParserValidateAndParse -fuzztime=5s + go test ./internal/parser/markdown -run=^$ -fuzz=FuzzExtractLinksFromContent -fuzztime=5s + go test ./internal/parser/toml -run=^$ -fuzz=FuzzTOMLParserValidateAndParse -fuzztime=5s + go test ./internal/parser/xml -run=^$ -fuzz=FuzzXMLParserValidateAndParse -fuzztime=5s + go test ./internal/parser/yaml -run=^$ -fuzz=FuzzYAMLParserValidateAndParse -fuzztime=5s From 738de4947c1cc0db8a7b07934721b20ca7ca1790 Mon Sep 17 00:00:00 2001 From: Leonardo Maldonado Date: Sun, 17 May 2026 17:26:35 +0200 Subject: [PATCH 05/13] fix(security): skip symlinks in scanner and refuse symlink writes in fixer Scanner now skips symlinks during WalkDir (via d.Type()&os.ModeSymlink) to prevent following links that point outside the user-specified scan root. Fixer now refuses to rewrite a path whose Lstat reports a symlink, preventing an attacker-controlled symlink from redirecting writes onto arbitrary files (e.g. ~/.ssh/authorized_keys) when the user runs 'gone fix' over a directory they don't fully control. Tests cover both behaviours, with Unix-only skips. --- internal/fixer/fixer.go | 13 +++++++-- internal/fixer/fixer_test.go | 47 ++++++++++++++++++++++++++++++++ internal/scanner/scanner.go | 7 ++++- internal/scanner/scanner_test.go | 30 ++++++++++++++++++++ 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/internal/fixer/fixer.go b/internal/fixer/fixer.go index 9adf70f..c90c901 100644 --- a/internal/fixer/fixer.go +++ b/internal/fixer/fixer.go @@ -288,6 +288,15 @@ func (*Fixer) ApplyToFile(fc FileChanges) (*FixResult, error) { ChangedURLs: []URLChange{}, } + // Refuse to operate on symlinks. The scanner already filters them, but + // re-checking here prevents path-traversal in any code path that builds + // FileChanges from another source: a symlink inside the workspace could + // otherwise be used to read or overwrite a file elsewhere on disk. + if info, lerr := os.Lstat(fc.FilePath); lerr == nil && info.Mode()&os.ModeSymlink != 0 { + result.Error = fmt.Errorf("refusing to fix symlinked path: %s", fc.FilePath) + return result, result.Error + } + // Read file content content, err := os.ReadFile(fc.FilePath) if err != nil { @@ -335,8 +344,8 @@ func (*Fixer) ApplyToFile(fc FileChanges) (*FixResult, error) { return result, nil } - // Write modified content back to file - //nolint:gosec // fc.FilePath originates from files already scanned in the current workspace + // Write modified content back to file. The scanner rejects symlinks, so + // fc.FilePath is expected to point to a regular file inside the scan root. err = os.WriteFile(fc.FilePath, []byte(modifiedContent), 0o600) if err != nil { result.Error = fmt.Errorf("writing file: %w", err) diff --git a/internal/fixer/fixer_test.go b/internal/fixer/fixer_test.go index 8419d4a..b3ccf3a 100644 --- a/internal/fixer/fixer_test.go +++ b/internal/fixer/fixer_test.go @@ -3,6 +3,7 @@ package fixer import ( "os" "path/filepath" + "runtime" "testing" "github.com/stretchr/testify/assert" @@ -569,6 +570,52 @@ func TestFixer_ApplyToFile_FileNotFound(t *testing.T) { assert.Contains(t, result.Error.Error(), "reading file") } +func TestFixer_ApplyToFile_RefusesSymlink(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("symlinks require elevated privileges on Windows") + } + + // Simulates the attack scenario: an attacker-controlled repository + // contains a symlink whose target sits outside the workspace. The fixer + // must refuse to follow it, otherwise os.WriteFile would overwrite the + // target file on the victim's machine. + workspace := t.TempDir() + outsideDir := t.TempDir() + + outsideFile := filepath.Join(outsideDir, "victim.md") + originalContent := "Important file. https://old.com is referenced here." + require.NoError(t, os.WriteFile(outsideFile, []byte(originalContent), 0o600)) + + linkPath := filepath.Join(workspace, "innocent.md") + require.NoError(t, os.Symlink(outsideFile, linkPath)) + + changes := FileChanges{ + FilePath: linkPath, + Fixes: []Fix{ + { + FilePath: linkPath, + OldURL: "https://old.com", + NewURL: "https://attacker-controlled.example", + }, + }, + } + + f := New() + result, err := f.ApplyToFile(changes) + + require.Error(t, err) + assert.Contains(t, err.Error(), "symlink") + assert.Equal(t, 0, result.Applied) + + // The file outside the workspace must be byte-identical to its + // original content. + got, rerr := os.ReadFile(outsideFile) + require.NoError(t, rerr) + assert.Equal(t, originalContent, string(got), "symlink target must not be modified") +} + func TestFixer_ApplyToFile_NoChanges(t *testing.T) { t.Parallel() diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 0234be8..beb6007 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -41,8 +41,13 @@ func FindFiles(root string, extensions []string) ([]string, error) { return filepath.SkipDir } - // Check if this file has a matching extension + // Check if this file has a matching extension. + // Skip symlinks so that subsequent reads/writes cannot escape the + // scan root by following a link to a file elsewhere on disk. if !d.IsDir() { + if d.Type()&os.ModeSymlink != 0 { + return nil + } ext := strings.ToLower(filepath.Ext(d.Name())) if normalizedExts[ext] { files = append(files, path) diff --git a/internal/scanner/scanner_test.go b/internal/scanner/scanner_test.go index fc3f8cb..325c009 100644 --- a/internal/scanner/scanner_test.go +++ b/internal/scanner/scanner_test.go @@ -163,6 +163,36 @@ func TestFindFiles(t *testing.T) { assert.Contains(t, files[0], "root.md") }) + t.Run("SymlinkedFileIsSkipped", func(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("symlinks require elevated privileges on Windows") + } + + // A symlinked file inside the scan root that points to a file + // outside the root must not be returned. Returning it would allow + // later read/write operations to follow the link and escape the + // workspace. + workspace := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(workspace, "real.md"), []byte("# Real"), 0o644)) + + outside := t.TempDir() + outsideFile := filepath.Join(outside, "secret.md") + require.NoError(t, os.WriteFile(outsideFile, []byte("# Secret"), 0o644)) + + linkPath := filepath.Join(workspace, "linked.md") + require.NoError(t, os.Symlink(outsideFile, linkPath)) + + files, err := FindFiles(workspace, []string{".md"}) + require.NoError(t, err) + assert.Len(t, files, 1) + assert.Contains(t, files[0], "real.md") + for _, f := range files { + assert.NotEqual(t, linkPath, f, "symlinked file must not be returned") + } + }) + t.Run("UnreadableDirectoryReturnsError", func(t *testing.T) { t.Parallel() From 15f57df492d42c1f068eef5de5f034a9b8209e5b Mon Sep 17 00:00:00 2001 From: Leonardo Maldonado Date: Sun, 17 May 2026 17:26:51 +0200 Subject: [PATCH 06/13] fix(security): block SSRF to private, loopback, link-local and metadata ranges Default-deny outbound TCP to dangerous CIDRs (loopback 127.0.0.0/8 & ::1, RFC1918, link-local incl. 169.254.169.254 IMDS, IPv6 unique-local fc00::/7, multicast, unspecified). Defense in depth: 1. validateURL rejects http/https URLs whose literal host resolves into a blocked range and rejects non-http(s) schemes. 2. safeDialContext wraps the underlying net.Dialer.DialContext so hostnames whose DNS resolution lands in a blocked range are refused at TCP-dial time. This catches DNS rebinding because the check happens after resolution, on each connection. 3. followRedirectChain re-validates every hop, so a public URL that 302s to http://169.254.169.254/latest/meta-data is blocked. Users with legitimate intranet docs can opt in via --allow-private-hosts or output.allow_private_hosts: true in .gonerc.yaml. Test suites that hit httptest.NewServer (127.0.0.1) explicitly opt in via WithAllowPrivateHosts(true) so the production default stays default-deny. Adds internal/checker/safety_test.go with table-driven coverage: blocked-IP set, scheme rejection, URL validator, safe dial wrapper covering blocked literal IPs and DNS that resolves into private space, and end-to-end redirect-chain refusal. --- cmd/check.go | 17 +- cmd/check_runner.go | 75 +++---- cmd/e2e_bench_test.go | 2 + cmd/fix.go | 14 +- cmd/fix_runner.go | 55 +++--- cmd/helpers.go | 19 +- cmd/helpers_test.go | 2 + cmd/integration_test.go | 6 +- cmd/interactive.go | 14 +- cmd/interactive_runner.go | 27 +-- internal/checker/checker.go | 22 ++- internal/checker/checker_test.go | 64 +++--- internal/checker/options.go | 14 ++ internal/checker/safety.go | 160 +++++++++++++++ internal/checker/safety_test.go | 322 +++++++++++++++++++++++++++++++ internal/config/config.go | 7 + 16 files changed, 693 insertions(+), 127 deletions(-) create mode 100644 internal/checker/safety.go create mode 100644 internal/checker/safety_test.go diff --git a/cmd/check.go b/cmd/check.go index 754f227..860ceeb 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -30,11 +30,12 @@ var ( strictMode bool // Ignore flags. - ignoreDomains []string - ignorePatterns []string - ignoreRegex []string - showIgnored bool - noConfig bool + ignoreDomains []string + ignorePatterns []string + ignoreRegex []string + showIgnored bool + noConfig bool + allowPrivateHosts bool ) // checkCmd represents the check command. @@ -144,6 +145,12 @@ func init() { "Show which URLs were ignored and why") checkCmd.Flags().BoolVar(&noConfig, "no-config", false, "Skip loading .gonerc.yaml config file") + + // Security options + checkCmd.Flags().BoolVar(&allowPrivateHosts, "allow-private-hosts", false, + "Allow requests to loopback, private, link-local and reserved IP "+ + "ranges. Default is to block them to prevent SSRF when scanning "+ + "untrusted documents.") } // runCheck is the main entry point for the check command. diff --git a/cmd/check_runner.go b/cmd/check_runner.go index a303238..f368ce2 100644 --- a/cmd/check_runner.go +++ b/cmd/check_runner.go @@ -11,23 +11,24 @@ import ( ) type checkOptions struct { - OutputFormat string - OutputFile string - Concurrency int - Timeout int - Retries int - ShowAlive bool - ShowWarnings bool - ShowDead bool - ShowAll bool - ShowStats bool - FileTypes []string - StrictMode bool - IgnoreDomains []string - IgnorePatterns []string - IgnoreRegex []string - ShowIgnored bool - NoConfig bool + OutputFormat string + OutputFile string + Concurrency int + Timeout int + Retries int + ShowAlive bool + ShowWarnings bool + ShowDead bool + ShowAll bool + ShowStats bool + FileTypes []string + StrictMode bool + IgnoreDomains []string + IgnorePatterns []string + IgnoreRegex []string + ShowIgnored bool + NoConfig bool + AllowPrivateHosts bool } type checkRunner struct { @@ -46,23 +47,24 @@ func newCheckRunner(opts checkOptions, env CommandEnv, streams IOStreams) *check func currentCheckOptions() checkOptions { return checkOptions{ - OutputFormat: outputFormat, - OutputFile: outputFile, - Concurrency: concurrency, - Timeout: timeout, - Retries: retries, - ShowAlive: showAlive, - ShowWarnings: showWarnings, - ShowDead: showDead, - ShowAll: showAll, - ShowStats: showStats, - FileTypes: append([]string{}, fileTypes...), - StrictMode: strictMode, - IgnoreDomains: append([]string{}, ignoreDomains...), - IgnorePatterns: append([]string{}, ignorePatterns...), - IgnoreRegex: append([]string{}, ignoreRegex...), - ShowIgnored: showIgnored, - NoConfig: noConfig, + OutputFormat: outputFormat, + OutputFile: outputFile, + Concurrency: concurrency, + Timeout: timeout, + Retries: retries, + ShowAlive: showAlive, + ShowWarnings: showWarnings, + ShowDead: showDead, + ShowAll: showAll, + ShowStats: showStats, + FileTypes: append([]string{}, fileTypes...), + StrictMode: strictMode, + IgnoreDomains: append([]string{}, ignoreDomains...), + IgnorePatterns: append([]string{}, ignorePatterns...), + IgnoreRegex: append([]string{}, ignoreRegex...), + ShowIgnored: showIgnored, + NoConfig: noConfig, + AllowPrivateHosts: allowPrivateHosts, } } @@ -244,7 +246,10 @@ func (r *checkRunner) checkLinksWithConfig( links []checker.Link, cfg *LoadedConfig, perf *stats.Stats, ) ([]checker.Result, checker.Summary) { perf.StartCheck() - c := r.env.NewChecker(cfg.BuildCheckerOptions(r.opts.Concurrency, r.opts.Timeout, r.opts.Retries)) + c := r.env.NewChecker(cfg.BuildCheckerOptions( + r.opts.Concurrency, r.opts.Timeout, r.opts.Retries, + r.opts.AllowPrivateHosts, + )) results := c.CheckAll(links) summary := checker.Summarize(results) perf.EndCheck() diff --git a/cmd/e2e_bench_test.go b/cmd/e2e_bench_test.go index 1f2df3d..226da0c 100644 --- a/cmd/e2e_bench_test.go +++ b/cmd/e2e_bench_test.go @@ -100,6 +100,7 @@ func BenchmarkPipeline_FullCheck(b *testing.B) { c := checker.New( checker.DefaultOptions(). + WithAllowPrivateHosts(true). WithConcurrency(16). WithTimeout(2 * time.Second). WithMaxRetries(0), @@ -135,6 +136,7 @@ func BenchmarkPipeline_FixDryRun(b *testing.B) { c := checker.New( checker.DefaultOptions(). + WithAllowPrivateHosts(true). WithConcurrency(16). WithTimeout(2 * time.Second). WithMaxRetries(0), diff --git a/cmd/fix.go b/cmd/fix.go index 3086f1c..5e86cec 100644 --- a/cmd/fix.go +++ b/cmd/fix.go @@ -22,10 +22,11 @@ var ( fixStrictMode bool // Ignore flags (shared with check). - fixIgnoreDomains []string - fixIgnorePatterns []string - fixIgnoreRegex []string - fixNoConfig bool + fixIgnoreDomains []string + fixIgnorePatterns []string + fixIgnoreRegex []string + fixNoConfig bool + fixAllowPrivateHosts bool ) // fixCmd represents the fix command. @@ -100,6 +101,11 @@ func init() { "Regex patterns to ignore (can be repeated)") fixCmd.Flags().BoolVar(&fixNoConfig, "no-config", false, "Skip loading .gonerc.yaml config file") + + // Security options + fixCmd.Flags().BoolVar(&fixAllowPrivateHosts, "allow-private-hosts", false, + "Allow requests to loopback, private, link-local and reserved IP "+ + "ranges. Default is to block them to prevent SSRF.") } // runFix is the main entry point for the fix command. diff --git a/cmd/fix_runner.go b/cmd/fix_runner.go index 0ddfcc9..7821370 100644 --- a/cmd/fix_runner.go +++ b/cmd/fix_runner.go @@ -10,18 +10,19 @@ import ( ) type fixOptions struct { - Yes bool - DryRun bool - Concurrency int - Timeout int - Retries int - ShowStats bool - FileTypes []string - StrictMode bool - IgnoreDomains []string - IgnorePatterns []string - IgnoreRegex []string - NoConfig bool + Yes bool + DryRun bool + Concurrency int + Timeout int + Retries int + ShowStats bool + FileTypes []string + StrictMode bool + IgnoreDomains []string + IgnorePatterns []string + IgnoreRegex []string + NoConfig bool + AllowPrivateHosts bool } type fixRunner struct { @@ -40,18 +41,19 @@ func newFixRunner(opts fixOptions, env CommandEnv, streams IOStreams) *fixRunner func currentFixOptions() fixOptions { return fixOptions{ - Yes: fixYes, - DryRun: fixDryRun, - Concurrency: fixConcurrency, - Timeout: fixTimeout, - Retries: fixRetries, - ShowStats: fixShowStats, - FileTypes: append([]string{}, fixFileTypes...), - StrictMode: fixStrictMode, - IgnoreDomains: append([]string{}, fixIgnoreDomains...), - IgnorePatterns: append([]string{}, fixIgnorePatterns...), - IgnoreRegex: append([]string{}, fixIgnoreRegex...), - NoConfig: fixNoConfig, + Yes: fixYes, + DryRun: fixDryRun, + Concurrency: fixConcurrency, + Timeout: fixTimeout, + Retries: fixRetries, + ShowStats: fixShowStats, + FileTypes: append([]string{}, fixFileTypes...), + StrictMode: fixStrictMode, + IgnoreDomains: append([]string{}, fixIgnoreDomains...), + IgnorePatterns: append([]string{}, fixIgnorePatterns...), + IgnoreRegex: append([]string{}, fixIgnoreRegex...), + NoConfig: fixNoConfig, + AllowPrivateHosts: fixAllowPrivateHosts, } } @@ -126,7 +128,10 @@ func (r *fixRunner) Run(args []string) int { perf.StartCheck() results := r.env.NewChecker( - loadedCfg.BuildCheckerOptions(r.opts.Concurrency, r.opts.Timeout, r.opts.Retries), + loadedCfg.BuildCheckerOptions( + r.opts.Concurrency, r.opts.Timeout, r.opts.Retries, + r.opts.AllowPrivateHosts, + ), ).CheckAll(links) perf.EndCheck() diff --git a/cmd/helpers.go b/cmd/helpers.go index 71b86b6..8b9393d 100644 --- a/cmd/helpers.go +++ b/cmd/helpers.go @@ -94,6 +94,15 @@ func (lc *LoadedConfig) GetTimeout(cliValue, defaultValue int) int { return defaultValue } +// GetAllowPrivateHosts returns the effective allow-private-hosts flag. +// CLI true overrides config; otherwise the config value is used. +func (lc *LoadedConfig) GetAllowPrivateHosts(cliValue bool) bool { + if cliValue { + return true + } + return lc.cfg.Check.AllowPrivateHosts +} + // GetRetries returns the effective retry count. // CLI overrides config if it differs from the default. func (lc *LoadedConfig) GetRetries(cliValue, defaultValue int) int { @@ -161,13 +170,19 @@ func (lc *LoadedConfig) GetShowStats(cliValue bool) bool { } // BuildCheckerOptions creates checker.Options from config and CLI values. -func (lc *LoadedConfig) BuildCheckerOptions(cliConcurrency, cliTimeout, cliRetries int) checker.Options { +// cliAllowPrivate reflects the --allow-private-hosts flag; when true the +// checker is permitted to contact loopback, private, and reserved IPs. +func (lc *LoadedConfig) BuildCheckerOptions( + cliConcurrency, cliTimeout, cliRetries int, + cliAllowPrivate bool, +) checker.Options { defaultOpts := checker.DefaultOptions() return defaultOpts. WithConcurrency(lc.GetConcurrency(cliConcurrency, checker.DefaultConcurrency)). WithTimeout(time.Duration(lc.GetTimeout(cliTimeout, int(checker.DefaultTimeout.Seconds()))) * time.Second). - WithMaxRetries(lc.GetRetries(cliRetries, checker.DefaultMaxRetries)) + WithMaxRetries(lc.GetRetries(cliRetries, checker.DefaultMaxRetries)). + WithAllowPrivateHosts(lc.GetAllowPrivateHosts(cliAllowPrivate)) } // BuildScanOptions creates scanner.ScanOptions from config and path. diff --git a/cmd/helpers_test.go b/cmd/helpers_test.go index 9b74d36..6fb6303 100644 --- a/cmd/helpers_test.go +++ b/cmd/helpers_test.go @@ -111,10 +111,12 @@ func TestLoadedConfig_GettersAndBuilders(t *testing.T) { checker.DefaultConcurrency, int(checker.DefaultTimeout.Seconds()), checker.DefaultMaxRetries, + false, ) assert.Equal(t, 12, opts.Concurrency) assert.Equal(t, 7*time.Second, opts.Timeout) assert.Equal(t, 4, opts.MaxRetries) + assert.False(t, opts.AllowPrivateHosts) scanOpts := loaded.BuildScanOptions("/tmp/docs", []string{"md"}, []string{"md"}) assert.Equal(t, "/tmp/docs", scanOpts.Root) diff --git a/cmd/integration_test.go b/cmd/integration_test.go index 0739e00..8481549 100644 --- a/cmd/integration_test.go +++ b/cmd/integration_test.go @@ -91,7 +91,7 @@ func TestCheck_WritesOutputFile(t *testing.T) { )) reportPath := filepath.Join(tmpDir, "report.json") - result := runGone(t, tmpDir, "check", ".", "--output", reportPath, "--no-config") + result := runGone(t, tmpDir, "check", ".", "--output", reportPath, "--no-config", "--allow-private-hosts") require.Equal(t, 0, result.exitCode, result.stderr) assert.Contains(t, result.stdout, "Wrote report to") @@ -179,7 +179,7 @@ func TestFix_Yes_UpdatesRedirectsAcrossFileTypes(t *testing.T) { 0o644, )) - result := runGone(t, tmpDir, "fix", ".", "--yes", "--types=md,json", "--no-config") + result := runGone(t, tmpDir, "fix", ".", "--yes", "--types=md,json", "--no-config", "--allow-private-hosts") require.Equal(t, 0, result.exitCode, result.stderr) assert.Contains(t, result.stdout, "Found 2 file(s) of type(s): md, json") assert.Contains(t, result.stdout, "Fixed 2 redirect(s) across 2 file(s):") @@ -218,7 +218,7 @@ func TestFix_InteractiveScriptedInput(t *testing.T) { filePath := filepath.Join(tmpDir, "README.md") require.NoError(t, os.WriteFile(filePath, []byte("[docs]("+oldURL+")\n"), 0o644)) - result := runGoneWithInput(t, tmpDir, "?\ny\n", "fix", ".", "--types=md", "--no-config") + result := runGoneWithInput(t, tmpDir, "?\ny\n", "fix", ".", "--types=md", "--no-config", "--allow-private-hosts") require.Equal(t, 0, result.exitCode, result.stderr) assert.Contains(t, result.stdout, "Interactive mode options:") assert.Contains(t, result.stdout, "Fixed 1 redirect(s) in README.md") diff --git a/cmd/interactive.go b/cmd/interactive.go index 6bde1c3..429c576 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -11,10 +11,11 @@ var ( iFileTypes []string iStrictMode bool - iIgnoreDomains []string - iIgnorePatterns []string - iIgnoreRegex []string - iNoConfig bool + iIgnoreDomains []string + iIgnorePatterns []string + iIgnoreRegex []string + iNoConfig bool + iAllowPrivateHosts bool ) // interactiveCmd represents the interactive command. @@ -64,6 +65,11 @@ func init() { "Regex patterns to ignore (can be repeated)") interactiveCmd.Flags().BoolVar(&iNoConfig, "no-config", false, "Skip loading .gonerc.yaml config file") + + // Security options + interactiveCmd.Flags().BoolVar(&iAllowPrivateHosts, "allow-private-hosts", false, + "Allow requests to loopback, private, link-local and reserved IP "+ + "ranges. Default is to block them to prevent SSRF.") } // runInteractive launches the interactive TUI for link checking. diff --git a/cmd/interactive_runner.go b/cmd/interactive_runner.go index f73c09a..419a897 100644 --- a/cmd/interactive_runner.go +++ b/cmd/interactive_runner.go @@ -3,12 +3,13 @@ package cmd import "github.com/leonardomso/gone/internal/checker" type interactiveOptions struct { - FileTypes []string - StrictMode bool - IgnoreDomains []string - IgnorePatterns []string - IgnoreRegex []string - NoConfig bool + FileTypes []string + StrictMode bool + IgnoreDomains []string + IgnorePatterns []string + IgnoreRegex []string + NoConfig bool + AllowPrivateHosts bool } type interactiveRunner struct { @@ -31,12 +32,13 @@ func newInteractiveRunner( func currentInteractiveOptions() interactiveOptions { return interactiveOptions{ - FileTypes: append([]string{}, iFileTypes...), - StrictMode: iStrictMode, - IgnoreDomains: append([]string{}, iIgnoreDomains...), - IgnorePatterns: append([]string{}, iIgnorePatterns...), - IgnoreRegex: append([]string{}, iIgnoreRegex...), - NoConfig: iNoConfig, + FileTypes: append([]string{}, iFileTypes...), + StrictMode: iStrictMode, + IgnoreDomains: append([]string{}, iIgnoreDomains...), + IgnorePatterns: append([]string{}, iIgnorePatterns...), + IgnoreRegex: append([]string{}, iIgnoreRegex...), + NoConfig: iNoConfig, + AllowPrivateHosts: iAllowPrivateHosts, } } @@ -76,6 +78,7 @@ func (r *interactiveRunner) Run(args []string) int { checker.DefaultConcurrency, int(checker.DefaultTimeout.Seconds()), checker.DefaultMaxRetries, + r.opts.AllowPrivateHosts, ), ) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index c415e51..d7bf2b8 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -51,11 +51,16 @@ func newHTTPClient(opts Options) *http.Client { MinVersion: tls.VersionTLS12, }, - // Timeout layers for different phases - tuned for speed - DialContext: (&net.Dialer{ - Timeout: opts.Timeout, - KeepAlive: 30 * time.Second, - }).DialContext, + // Timeout layers for different phases - tuned for speed. + // The base dialer is wrapped by safeDialContext so every resolved + // IP is screened against the SSRF blocklist before connecting. + DialContext: safeDialContext( + (&net.Dialer{ + Timeout: opts.Timeout, + KeepAlive: 30 * time.Second, + }).DialContext, + opts.AllowPrivateHosts, + ), TLSHandshakeTimeout: 5 * time.Second, // Faster TLS handshake timeout ResponseHeaderTimeout: opts.Timeout, ExpectContinueTimeout: 1 * time.Second, @@ -342,6 +347,13 @@ func (c *Checker) followRedirectChain(ctx context.Context, startURL string) ([]R currentURL := startURL for i := 0; i < c.opts.MaxRedirects; i++ { + // Refuse to fetch URLs whose scheme is not http(s) or whose literal + // host IP is in the SSRF blocklist. safeDialContext catches the + // hostname-resolves-to-private-IP case at the TCP layer. + if err := validateURL(currentURL, c.opts.AllowPrivateHosts); err != nil { + return chain, currentURL, 0, fmt.Errorf("blocked redirect: %w", err) + } + statusCode, location, err := c.doRequestGetLocation(ctx, currentURL) if err != nil { return chain, currentURL, 0, err diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index cd958cf..ea3db6e 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -539,7 +539,7 @@ func TestChecker_CheckAll_200OK(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL, FilePath: "test.md", Line: 1}} results := checker.CheckAll(links) @@ -557,7 +557,7 @@ func TestChecker_CheckAll_404NotFound(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -575,7 +575,7 @@ func TestChecker_CheckAll_500ServerError(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -599,7 +599,7 @@ func TestChecker_CheckAll_HeadFallbackToGet(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -622,7 +622,7 @@ func TestChecker_CheckAll_Redirect301(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) @@ -647,7 +647,7 @@ func TestChecker_CheckAll_Redirect302(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) @@ -676,7 +676,7 @@ func TestChecker_CheckAll_RedirectChain(t *testing.T) { })) defer serverA.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: serverA.URL}} results := checker.CheckAll(links) @@ -697,7 +697,7 @@ func TestChecker_CheckAll_TooManyRedirects(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0).WithMaxRedirects(3)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0).WithMaxRedirects(3)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -720,7 +720,7 @@ func TestChecker_CheckAll_RedirectToDead(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) @@ -737,7 +737,7 @@ func TestChecker_CheckAll_403Blocked(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -761,7 +761,7 @@ func TestChecker_CheckAll_403ThenOKWithBrowserHeaders(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -778,7 +778,7 @@ func TestChecker_CheckAll_Duplicates(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{ {URL: server.URL, FilePath: "a.md", Line: 1}, {URL: server.URL, FilePath: "b.md", Line: 5}, // Duplicate @@ -817,7 +817,7 @@ func TestChecker_CheckAll_MultipleDifferentURLs(t *testing.T) { })) defer server2.Close() - checker := New(DefaultOptions().WithConcurrency(2).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(2).WithMaxRetries(0)) links := []Link{ {URL: server1.URL}, {URL: server2.URL}, @@ -850,7 +850,7 @@ func TestChecker_CheckAll_RetryOn5xx(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(3)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(3)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -874,7 +874,7 @@ func TestChecker_CheckAll_RetryOn429(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(2)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(2)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -893,7 +893,7 @@ func TestChecker_CheckAll_NoRetryOn404(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(3)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(3)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -912,7 +912,7 @@ func TestChecker_Check_ContextCanceled(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithTimeout(10 * time.Second)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithTimeout(10 * time.Second)) links := []Link{{URL: server.URL}} ctx, cancel := context.WithCancel(context.Background()) @@ -937,7 +937,7 @@ func TestChecker_Check_ContextCanceled(t *testing.T) { func TestChecker_CheckAll_EmptyLinks(t *testing.T) { t.Parallel() - checker := New(DefaultOptions()) + checker := New(DefaultOptions().WithAllowPrivateHosts(true)) results := checker.CheckAll(nil) assert.Empty(t, results) @@ -953,7 +953,7 @@ func TestChecker_CheckAll_PreservesLinkMetadata(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1)) links := []Link{{ URL: server.URL, FilePath: "README.md", @@ -978,7 +978,7 @@ func TestChecker_CheckAll_Timeout(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithTimeout(100 * time.Millisecond).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithTimeout(100 * time.Millisecond).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -991,7 +991,7 @@ func TestChecker_CheckAll_Timeout(t *testing.T) { func TestChecker_CheckAll_InvalidURL(t *testing.T) { t.Parallel() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: "not-a-valid-url"}} results := checker.CheckAll(links) @@ -1004,7 +1004,7 @@ func TestChecker_CheckAll_ConnectionRefused(t *testing.T) { t.Parallel() // Port that nothing is listening on - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0).WithTimeout(1 * time.Second)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0).WithTimeout(1 * time.Second)) links := []Link{{URL: "http://127.0.0.1:59999"}} results := checker.CheckAll(links) @@ -1043,7 +1043,7 @@ func TestChecker_CheckAll_Concurrency(t *testing.T) { links[i] = Link{URL: server.URL + "/" + string(rune('a'+i))} } - checker := New(DefaultOptions().WithConcurrency(5).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(5).WithMaxRetries(0)) results := checker.CheckAll(links) assert.Len(t, results, 20) @@ -1216,7 +1216,7 @@ func TestChecker_CheckAll_VariousStatusCodes(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -1252,7 +1252,7 @@ func TestChecker_CheckAll_RedirectTo403ThenOKWithBrowserHeaders(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) @@ -1278,7 +1278,7 @@ func TestChecker_CheckAll_RedirectTo403StillBlocked(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) @@ -1302,7 +1302,7 @@ func TestChecker_CheckAll_501NotImplemented(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -1328,7 +1328,7 @@ func TestChecker_CheckAll_RedirectWithRelativeLocation(t *testing.T) { server := httptest.NewServer(mux) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL + "/start"}} results := checker.CheckAll(links) @@ -1349,7 +1349,7 @@ func TestChecker_CheckAll_RedirectWithInvalidLocation(t *testing.T) { })) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL}} results := checker.CheckAll(links) @@ -1419,7 +1419,7 @@ func TestChecker_CheckAll_RedirectToSameHost(t *testing.T) { server := httptest.NewServer(mux) defer server.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: server.URL + "/a"}} results := checker.CheckAll(links) @@ -1443,7 +1443,7 @@ func TestChecker_CheckAll_307TemporaryRedirect(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) @@ -1466,7 +1466,7 @@ func TestChecker_CheckAll_308PermanentRedirect(t *testing.T) { })) defer redirectServer.Close() - checker := New(DefaultOptions().WithConcurrency(1).WithMaxRetries(0)) + checker := New(DefaultOptions().WithAllowPrivateHosts(true).WithConcurrency(1).WithMaxRetries(0)) links := []Link{{URL: redirectServer.URL}} results := checker.CheckAll(links) diff --git a/internal/checker/options.go b/internal/checker/options.go index febcc5c..589e77f 100644 --- a/internal/checker/options.go +++ b/internal/checker/options.go @@ -45,6 +45,13 @@ type Options struct { // MaxRedirects is the maximum number of redirects to follow. MaxRedirects int + + // AllowPrivateHosts permits requests to loopback, link-local, private, + // and other reserved IP ranges. The zero value (false) is the safe + // default for users who run the tool on untrusted documents: it + // prevents SSRF against cloud metadata services (169.254.169.254), + // internal corporate hosts, and locally bound services. + AllowPrivateHosts bool } // DefaultOptions returns optimized default configuration. @@ -99,6 +106,13 @@ func (o Options) WithUserAgent(ua string) Options { return o } +// WithAllowPrivateHosts sets whether requests to loopback, link-local, +// private, and reserved IP ranges are permitted. Defaults to false. +func (o Options) WithAllowPrivateHosts(v bool) Options { + o.AllowPrivateHosts = v + return o +} + // BrowserUserAgent is a realistic browser User-Agent for bypassing bot detection. const BrowserUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" diff --git a/internal/checker/safety.go b/internal/checker/safety.go new file mode 100644 index 0000000..713ffe3 --- /dev/null +++ b/internal/checker/safety.go @@ -0,0 +1,160 @@ +// Package checker - safety.go contains URL and address validation used to +// prevent server-side request forgery (SSRF). Without these guards an +// attacker who controls a redirect target or who plants a URL in a scanned +// document could coerce the tool into fetching cloud metadata endpoints +// (e.g. 169.254.169.254), localhost services, or internal corporate hosts +// from the user's machine. +package checker + +import ( + "context" + "errors" + "fmt" + "net" + "net/url" + "strings" +) + +// ErrBlockedAddress is returned when a URL or its resolved IP falls inside +// a range that the checker refuses to contact (loopback, link-local, +// private, CGNAT, unspecified, etc.). +var ErrBlockedAddress = errors.New("blocked address") + +// ErrUnsupportedScheme is returned for URLs whose scheme is not http or https. +var ErrUnsupportedScheme = errors.New("unsupported scheme") + +// blockedCIDRs lists the IP ranges considered unsafe to contact during link +// checking. The list is intentionally explicit so that adding or removing a +// range is a code change with an accompanying test. +var blockedCIDRs = compileCIDRs([]string{ + // IPv4 + "0.0.0.0/8", // "this network" + "10.0.0.0/8", // RFC 1918 private + "100.64.0.0/10", // CGNAT (RFC 6598) + "127.0.0.0/8", // loopback + "169.254.0.0/16", // link-local (incl. cloud metadata) + "172.16.0.0/12", // RFC 1918 private + "192.0.0.0/24", // IETF assignments + "192.0.2.0/24", // TEST-NET-1 + "192.168.0.0/16", // RFC 1918 private + "198.18.0.0/15", // benchmark + "198.51.100.0/24", // TEST-NET-2 + "203.0.113.0/24", // TEST-NET-3 + "224.0.0.0/4", // multicast + "240.0.0.0/4", // reserved + "255.255.255.255/32", + + // IPv6 + "::/128", // unspecified + "::1/128", // loopback + "fc00::/7", // unique local + "fe80::/10", // link-local + "ff00::/8", // multicast + "2001:db8::/32", // documentation +}) + +func compileCIDRs(cidrs []string) []*net.IPNet { + out := make([]*net.IPNet, 0, len(cidrs)) + for _, c := range cidrs { + _, n, err := net.ParseCIDR(c) + if err != nil { + // Programmer error - the list above is a constant. + panic(fmt.Sprintf("checker: invalid blocked CIDR %q: %v", c, err)) + } + out = append(out, n) + } + return out +} + +// isBlockedIP reports whether the given IP falls in any of blockedCIDRs. +func isBlockedIP(ip net.IP) bool { + if ip == nil { + return true + } + for _, n := range blockedCIDRs { + if n.Contains(ip) { + return true + } + } + return false +} + +// validateURL parses rawURL and ensures it uses an allowed scheme and that, +// if the host is a literal IP, the IP is not in a blocked range. Hostnames +// that resolve to blocked IPs are caught later by safeDialContext. +func validateURL(rawURL string, allowPrivate bool) error { + u, err := url.Parse(rawURL) + if err != nil { + return fmt.Errorf("parse url: %w", err) + } + scheme := strings.ToLower(u.Scheme) + if scheme != "http" && scheme != "https" { + return fmt.Errorf("%w: %q", ErrUnsupportedScheme, u.Scheme) + } + if allowPrivate { + return nil + } + host := u.Hostname() + if host == "" { + return fmt.Errorf("%w: empty host", ErrBlockedAddress) + } + if ip := net.ParseIP(host); ip != nil && isBlockedIP(ip) { + return fmt.Errorf("%w: %s", ErrBlockedAddress, ip) + } + return nil +} + +// safeDialContext wraps a base dial function so every resolved IP is +// checked before the connection is established. It catches: +// - hostnames that resolve to blocked IPs +// - DNS rebinding (a host that resolves to a public IP at validate time +// and a blocked IP at dial time) +// +// If allowPrivate is true the wrapper is a transparent pass-through. +func safeDialContext( + base func(ctx context.Context, network, addr string) (net.Conn, error), + allowPrivate bool, +) func(ctx context.Context, network, addr string) (net.Conn, error) { + if allowPrivate { + return base + } + resolver := net.DefaultResolver + return func(ctx context.Context, network, addr string) (net.Conn, error) { + host, port, err := net.SplitHostPort(addr) + if err != nil { + return nil, fmt.Errorf("split host:port: %w", err) + } + // Literal IP fast path. + if ip := net.ParseIP(host); ip != nil { + if isBlockedIP(ip) { + return nil, fmt.Errorf("%w: %s", ErrBlockedAddress, ip) + } + return base(ctx, network, addr) + } + // Resolve and verify every returned address. + ips, err := resolver.LookupIP(ctx, ipNetwork(network), host) + if err != nil { + return nil, err + } + for _, ip := range ips { + if isBlockedIP(ip) { + return nil, fmt.Errorf("%w: %s -> %s", ErrBlockedAddress, host, ip) + } + } + // Re-dial using the verified address. We pick the first allowed IP + // to avoid the resolver returning a different (possibly blocked) + // address between LookupIP and Dial. + return base(ctx, network, net.JoinHostPort(ips[0].String(), port)) + } +} + +func ipNetwork(network string) string { + switch network { + case "tcp4", "udp4", "ip4": + return "ip4" + case "tcp6", "udp6", "ip6": + return "ip6" + default: + return "ip" + } +} diff --git a/internal/checker/safety_test.go b/internal/checker/safety_test.go new file mode 100644 index 0000000..37207cb --- /dev/null +++ b/internal/checker/safety_test.go @@ -0,0 +1,322 @@ +package checker + +import ( + "context" + "errors" + "net" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ============================================================================= +// isBlockedIP / validateURL +// ============================================================================= + +func TestIsBlockedIP_TableDriven(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + ip string + blocked bool + }{ + // IPv4 reserved ranges that MUST be blocked. + {"loopback 127.0.0.1", "127.0.0.1", true}, + {"loopback 127.0.0.53", "127.0.0.53", true}, + {"link-local IMDS", "169.254.169.254", true}, + {"link-local generic", "169.254.1.1", true}, + {"RFC1918 10/8", "10.0.0.1", true}, + {"RFC1918 172.16/12 low", "172.16.0.1", true}, + {"RFC1918 172.16/12 high", "172.31.255.254", true}, + {"RFC1918 192.168/16", "192.168.1.1", true}, + {"CGNAT", "100.64.0.1", true}, + {"unspecified 0.0.0.0", "0.0.0.0", true}, + {"multicast", "224.0.0.1", true}, + {"broadcast 255.255.255.255", "255.255.255.255", true}, + {"TEST-NET-1", "192.0.2.1", true}, + + // IPv6 reserved ranges. + {"IPv6 loopback ::1", "::1", true}, + {"IPv6 unspecified ::", "::", true}, + {"IPv6 link-local fe80::", "fe80::1", true}, + {"IPv6 ULA fc00::", "fc00::1", true}, + {"IPv6 ULA fd00::", "fd00::1", true}, + + // Public addresses must NOT be blocked. + {"public IPv4 1.1.1.1", "1.1.1.1", false}, + {"public IPv4 8.8.8.8", "8.8.8.8", false}, + {"public IPv4 just above 172/12", "172.32.0.1", false}, + {"public IPv4 just below 10/8", "9.255.255.255", false}, + {"public IPv6 2001:4860::8888", "2001:4860::8888", false}, + + // Edge boundary case: 172.15.255.255 is OUTSIDE 172.16/12. + {"public boundary 172.15.255.255", "172.15.255.255", false}, + + // nil-ish (parses to nil) must be treated as blocked. + {"empty string", "", true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ip := net.ParseIP(tc.ip) + assert.Equal(t, tc.blocked, isBlockedIP(ip), + "isBlockedIP(%q) = %v, want %v", tc.ip, isBlockedIP(ip), tc.blocked) + }) + } +} + +func TestValidateURL_RejectsBlockedSchemes(t *testing.T) { + t.Parallel() + + cases := []string{ + "file:///etc/passwd", + "ftp://example.com/x", + "gopher://example.com/", + "javascript:alert(1)", + "data:text/html,