diff --git a/go.mod b/go.mod index c9609f0..e70eb90 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,6 @@ require ( github.com/mattn/go-sqlite3 v1.14.44 github.com/pressly/goose/v3 v3.27.1 github.com/stretchr/testify v1.11.1 - golang.org/x/term v0.43.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index dd4b309..c05c435 100644 --- a/go.sum +++ b/go.sum @@ -68,8 +68,6 @@ golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= -golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/adapters/providers/amazon/provider.go b/internal/adapters/providers/amazon/provider.go index 4d9f09d..68198f0 100644 --- a/internal/adapters/providers/amazon/provider.go +++ b/internal/adapters/providers/amazon/provider.go @@ -17,6 +17,7 @@ import ( "os/exec" "regexp" "strconv" + "strings" "time" "github.com/eshaffer321/monarchmoney-sync-backend/internal/adapters/providers" @@ -25,6 +26,21 @@ import ( // validProfilePattern matches alphanumeric, dash, and underscore characters only var validProfilePattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) +// allowedCLIArgsPattern limits arguments to the scraper flags and scalar values +// produced by buildCLIArgs. +var allowedCLIArgsPattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) + +const ( + amazonScraperCommand = "amazon-scraper" + npxCommand = "npx" + scraperPackageName = "amazon-order-scraper" +) + +type cliCommand struct { + name string + useNpx bool +} + // isValidProfile checks if a profile name is safe to pass to the CLI func isValidProfile(profile string) bool { if profile == "" { @@ -188,18 +204,26 @@ func (p *Provider) buildCLIArgs(opts providers.FetchOptions) []string { // executeCLI executes the amazon-order-scraper CLI and returns the output func (p *Provider) executeCLI(ctx context.Context, args []string) ([]byte, error) { // Try to find the CLI - cliPath, useNpx := p.findCLI() + cli, err := p.findCLI() + if err != nil { + return nil, err + } + if err := validateCLIArgs(args); err != nil { + return nil, err + } var cmd *exec.Cmd - if useNpx { + // Keep executable names literal for static analysis; args are validated before assignment. + if cli.useNpx { // Use npx to run the package - npxArgs := append([]string{"amazon-order-scraper"}, args...) - cmd = exec.CommandContext(ctx, cliPath, npxArgs...) - p.logger.Debug("executing CLI via npx", slog.String("args", fmt.Sprintf("%v", npxArgs))) + cmd = exec.CommandContext(ctx, "npx") + cmd.Args = append([]string{npxCommand, scraperPackageName}, args...) + p.logger.Debug("executing CLI via npx", slog.String("args", fmt.Sprintf("%v", cmd.Args[1:]))) } else { // Direct execution - cmd = exec.CommandContext(ctx, cliPath, args...) - p.logger.Debug("executing CLI directly", slog.String("path", cliPath), slog.String("args", fmt.Sprintf("%v", args))) + cmd = exec.CommandContext(ctx, "amazon-scraper") + cmd.Args = append([]string{amazonScraperCommand}, args...) + p.logger.Debug("executing CLI directly", slog.String("command", cli.name), slog.String("args", fmt.Sprintf("%v", args))) } var stdout, stderr bytes.Buffer @@ -209,7 +233,7 @@ func (p *Provider) executeCLI(ctx context.Context, args []string) ([]byte, error cmd.Env = append(os.Environ(), "BROWSER_DATA_DIR="+p.browserDataDir) } - err := cmd.Run() + err = cmd.Run() if err != nil { // Check exit code for specific errors if exitErr, ok := err.(*exec.ExitError); ok { @@ -240,19 +264,35 @@ func (p *Provider) loginCommand() string { // findCLI locates the amazon-order-scraper CLI // Returns the path and whether to use npx -func (p *Provider) findCLI() (string, bool) { +func (p *Provider) findCLI() (cliCommand, error) { // First, try to find globally installed CLI - if path, err := exec.LookPath("amazon-scraper"); err == nil { - return path, false + if _, err := exec.LookPath(amazonScraperCommand); err == nil { + return cliCommand{name: amazonScraperCommand}, nil } // Fall back to npx - if path, err := exec.LookPath("npx"); err == nil { - return path, true + if _, err := exec.LookPath(npxCommand); err == nil { + return cliCommand{name: npxCommand, useNpx: true}, nil } - // Last resort: assume npx is available - return "npx", true + return cliCommand{}, fmt.Errorf("amazon-order-scraper CLI not available: install %q or %q", amazonScraperCommand, npxCommand) +} + +func validateCLIArgs(args []string) error { + for _, arg := range args { + switch arg { + case "--since", "--until", "--days", "--profile", "--headless", "--stdout": + continue + } + if strings.HasPrefix(arg, "--") { + return fmt.Errorf("unsupported amazon CLI flag: %q", arg) + } + if !allowedCLIArgsPattern.MatchString(arg) { + return fmt.Errorf("unsafe amazon CLI argument: %q", arg) + } + } + + return nil } // GetOrderDetails fetches details for a specific order @@ -285,13 +325,16 @@ func (p *Provider) GetRateLimit() time.Duration { // HealthCheck verifies the provider can connect and authenticate func (p *Provider) HealthCheck(ctx context.Context) error { // Try to find the CLI - cliPath, useNpx := p.findCLI() + cli, err := p.findCLI() + if err != nil { + return err + } var cmd *exec.Cmd - if useNpx { - cmd = exec.CommandContext(ctx, cliPath, "amazon-order-scraper", "--help") + if cli.useNpx { + cmd = exec.CommandContext(ctx, npxCommand, scraperPackageName, "--help") } else { - cmd = exec.CommandContext(ctx, cliPath, "--help") + cmd = exec.CommandContext(ctx, amazonScraperCommand, "--help") } if err := cmd.Run(); err != nil { diff --git a/internal/adapters/providers/amazon/provider_test.go b/internal/adapters/providers/amazon/provider_test.go index cae3e13..0b16901 100644 --- a/internal/adapters/providers/amazon/provider_test.go +++ b/internal/adapters/providers/amazon/provider_test.go @@ -1,11 +1,16 @@ package amazon import ( + "context" + "os" + "path/filepath" + "strings" "testing" "time" "github.com/eshaffer321/monarchmoney-sync-backend/internal/adapters/providers" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // TestProvider_ImplementsInterface verifies the provider implements the interface @@ -59,6 +64,11 @@ func TestProvider_WithConfig(t *testing.T) { assert.Equal(t, "/tmp/itemize-amazon", provider.browserDataDir) } +func TestNewProvider_RejectsUnsafeProfile(t *testing.T) { + provider := NewProvider(nil, &ProviderConfig{Profile: "wife;rm-rf"}) + assert.Empty(t, provider.profile) +} + // TestProvider_BuildCLIArgs tests CLI argument building func TestProvider_BuildCLIArgs(t *testing.T) { tests := []struct { @@ -104,6 +114,157 @@ func TestProvider_BuildCLIArgs(t *testing.T) { } } +func TestValidateCLIArgs(t *testing.T) { + assert.NoError(t, validateCLIArgs([]string{ + "--since", "2024-11-01", + "--until", "2024-11-30", + "--days", "14", + "--profile", "wife", + "--headless", + "--stdout", + })) + assert.Error(t, validateCLIArgs([]string{"--stdout", ";rm"})) + assert.Error(t, validateCLIArgs([]string{"--stdout", "--eval"})) +} + +func TestExecuteCLIUsesDirectScraperWithValidatedArgsAndBrowserDir(t *testing.T) { + binDir := t.TempDir() + writeExecutable(t, binDir, amazonScraperCommand, `#!/bin/sh +printf 'args=%s\n' "$*" +printf 'browser=%s\n' "$BROWSER_DATA_DIR" +`) + t.Setenv("PATH", binDir) + + provider := NewProvider(nil, &ProviderConfig{BrowserDataDir: "/tmp/amazon-browser"}) + output, err := provider.executeCLI(context.Background(), []string{"--days", "14", "--stdout"}) + + require.NoError(t, err) + assert.Contains(t, string(output), "args=--days 14 --stdout") + assert.Contains(t, string(output), "browser=/tmp/amazon-browser") +} + +func TestExecuteCLIFallsBackToNpx(t *testing.T) { + binDir := t.TempDir() + writeExecutable(t, binDir, npxCommand, `#!/bin/sh +printf 'args=%s\n' "$*" +`) + t.Setenv("PATH", binDir) + + provider := NewProvider(nil, nil) + output, err := provider.executeCLI(context.Background(), []string{"--stdout"}) + + require.NoError(t, err) + assert.Contains(t, string(output), "args=amazon-order-scraper --stdout") +} + +func TestExecuteCLIReturnsValidationErrorBeforeRunningCommand(t *testing.T) { + binDir := t.TempDir() + markerPath := filepath.Join(binDir, "ran") + writeExecutable(t, binDir, amazonScraperCommand, "#!/bin/sh\ntouch "+markerPath+"\n") + t.Setenv("PATH", binDir) + + provider := NewProvider(nil, nil) + output, err := provider.executeCLI(context.Background(), []string{"--stdout", "--eval"}) + + require.Error(t, err) + assert.Nil(t, output) + assert.Contains(t, err.Error(), "unsupported amazon CLI flag") + _, statErr := os.Stat(markerPath) + assert.True(t, os.IsNotExist(statErr)) +} + +func TestExecuteCLILoginRequiredIncludesProfileAndBrowserDir(t *testing.T) { + binDir := t.TempDir() + writeExecutable(t, binDir, amazonScraperCommand, "#!/bin/sh\nexit 2\n") + t.Setenv("PATH", binDir) + + provider := NewProvider(nil, &ProviderConfig{ + Profile: "wife", + BrowserDataDir: "/tmp/amazon-browser", + }) + output, err := provider.executeCLI(context.Background(), []string{"--stdout"}) + + require.Error(t, err) + assert.Nil(t, output) + assert.Contains(t, err.Error(), "amazon login required") + assert.Contains(t, err.Error(), `BROWSER_DATA_DIR="/tmp/amazon-browser"`) + assert.Contains(t, err.Error(), "--profile wife") +} + +func TestFindCLIReportsMissingExecutable(t *testing.T) { + t.Setenv("PATH", t.TempDir()) + + provider := NewProvider(nil, nil) + cli, err := provider.findCLI() + + require.Error(t, err) + assert.Empty(t, cli.name) + assert.Contains(t, err.Error(), "not available") +} + +func TestHealthCheckUsesAvailableCLI(t *testing.T) { + tests := []struct { + name string + executableName string + wantArg string + }{ + { + name: "direct scraper", + executableName: amazonScraperCommand, + wantArg: "--help", + }, + { + name: "npx fallback", + executableName: npxCommand, + wantArg: "amazon-order-scraper --help", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + binDir := t.TempDir() + argsPath := filepath.Join(binDir, "args.txt") + writeExecutable(t, binDir, tt.executableName, `#!/bin/sh +printf '%s' "$*" > "`+argsPath+`" +`) + t.Setenv("PATH", binDir) + + provider := NewProvider(nil, nil) + require.NoError(t, provider.HealthCheck(context.Background())) + + data, err := os.ReadFile(argsPath) + require.NoError(t, err) + assert.Equal(t, tt.wantArg, string(data)) + }) + } +} + +func TestHealthCheckReturnsCommandError(t *testing.T) { + binDir := t.TempDir() + writeExecutable(t, binDir, amazonScraperCommand, "#!/bin/sh\nexit 1\n") + t.Setenv("PATH", binDir) + + provider := NewProvider(nil, nil) + err := provider.HealthCheck(context.Background()) + + require.Error(t, err) + assert.Contains(t, err.Error(), "CLI not available") +} + +func TestLoginCommandDefaultsToDirectScraper(t *testing.T) { + provider := NewProvider(nil, &ProviderConfig{Profile: "wife"}) + + assert.Equal(t, "run 'amazon-scraper --login --profile wife' to authenticate", provider.loginCommand()) +} + +func writeExecutable(t *testing.T, dir, name, content string) { + t.Helper() + + path := filepath.Join(dir, name) + content = strings.ReplaceAll(content, "\r\n", "\n") + require.NoError(t, os.WriteFile(path, []byte(content), 0700)) +} + // TestOrder_Interface verifies Order implements providers.Order func TestOrder_Interface(t *testing.T) { var _ providers.Order = (*Order)(nil) diff --git a/internal/infrastructure/config/config.go b/internal/infrastructure/config/config.go index f9348c7..d843074 100644 --- a/internal/infrastructure/config/config.go +++ b/internal/infrastructure/config/config.go @@ -14,6 +14,8 @@ package config import ( "fmt" "os" + "path/filepath" + "strings" "gopkg.in/yaml.v3" ) @@ -111,7 +113,20 @@ type LoggingConfig struct { // Load reads and parses the config file func Load(path string) (*Config, error) { - data, err := os.ReadFile(path) + rootDir, configPath, err := validateConfigPath(path) + if err != nil { + return nil, err + } + + root, err := os.OpenRoot(rootDir) + if err != nil { + return nil, err + } + defer func() { + _ = root.Close() + }() + + data, err := root.ReadFile(configPath) if err != nil { return nil, err } @@ -127,6 +142,39 @@ func Load(path string) (*Config, error) { return &cfg, nil } +func validateConfigPath(path string) (string, string, error) { + if strings.TrimSpace(path) == "" { + return "", "", fmt.Errorf("config path cannot be empty") + } + + cleanPath := filepath.Clean(path) + ext := strings.ToLower(filepath.Ext(cleanPath)) + if ext != ".yaml" && ext != ".yml" { + return "", "", fmt.Errorf("config path must point to a YAML file") + } + + rootDir := "." + rootPath := cleanPath + if !filepath.IsAbs(cleanPath) { + if strings.HasPrefix(cleanPath, ".."+string(filepath.Separator)) || cleanPath == ".." { + return "", "", fmt.Errorf("relative config path must stay within the working directory") + } + } else { + rootDir = filepath.Dir(cleanPath) + rootPath = filepath.Base(cleanPath) + } + + info, err := os.Stat(cleanPath) + if err != nil { + return "", "", err + } + if info.IsDir() { + return "", "", fmt.Errorf("config path must be a file") + } + + return rootDir, rootPath, nil +} + // LoadFromEnv loads configuration from environment variables only func LoadFromEnv() *Config { return &Config{ diff --git a/internal/infrastructure/config/config_test.go b/internal/infrastructure/config/config_test.go index 2e125e9..962e1ad 100644 --- a/internal/infrastructure/config/config_test.go +++ b/internal/infrastructure/config/config_test.go @@ -99,7 +99,7 @@ monarch: api_key: "${TEST_MONARCH_TOKEN}" ` - err := os.WriteFile(configPath, []byte(configContent), 0644) + err := os.WriteFile(configPath, []byte(configContent), 0600) require.NoError(t, err) // Set env vars @@ -115,3 +115,59 @@ monarch: assert.Equal(t, "expanded.db", cfg.Storage.DatabasePath) assert.Equal(t, "expanded-token", cfg.Monarch.APIKey) } + +func TestValidateConfigPathRejectsUnsafePaths(t *testing.T) { + tests := []string{ + "", + "../config.yaml", + "config.json", + } + + for _, path := range tests { + t.Run(path, func(t *testing.T) { + _, _, err := validateConfigPath(path) + require.Error(t, err) + }) + } +} + +func TestValidateConfigPathAcceptsRelativeYAMLWithinWorkingDirectory(t *testing.T) { + tmpDir := t.TempDir() + t.Chdir(tmpDir) + require.NoError(t, os.Mkdir("config", 0700)) + require.NoError(t, os.WriteFile(filepath.Join("config", "local.yml"), []byte("storage: {}\n"), 0600)) + + rootDir, rootPath, err := validateConfigPath(filepath.Join("config", "local.yml")) + + require.NoError(t, err) + assert.Equal(t, ".", rootDir) + assert.Equal(t, filepath.Join("config", "local.yml"), rootPath) +} + +func TestValidateConfigPathAcceptsAbsoluteYAML(t *testing.T) { + configPath := filepath.Join(t.TempDir(), "config.yaml") + require.NoError(t, os.WriteFile(configPath, []byte("storage: {}\n"), 0600)) + + rootDir, rootPath, err := validateConfigPath(configPath) + + require.NoError(t, err) + assert.Equal(t, filepath.Dir(configPath), rootDir) + assert.Equal(t, filepath.Base(configPath), rootPath) +} + +func TestValidateConfigPathRejectsMissingFileAndDirectory(t *testing.T) { + tmpDir := t.TempDir() + + _, _, err := validateConfigPath(filepath.Join(tmpDir, "missing.yaml")) + require.Error(t, err) + + _, _, err = validateConfigPath(tmpDir + string(filepath.Separator) + "config.yaml" + string(filepath.Separator)) + require.Error(t, err) + + dirPath := filepath.Join(tmpDir, "directory.yaml") + require.NoError(t, os.Mkdir(dirPath, 0700)) + + _, _, err = validateConfigPath(dirPath) + require.Error(t, err) + assert.Contains(t, err.Error(), "must be a file") +} diff --git a/internal/infrastructure/logging/logger.go b/internal/infrastructure/logging/logger.go index 7667149..ac1b240 100644 --- a/internal/infrastructure/logging/logger.go +++ b/internal/infrastructure/logging/logger.go @@ -37,7 +37,7 @@ func NewLogger(cfg config.LoggingConfig) *slog.Logger { var writer io.Writer = os.Stdout if cfg.FilePath != "" { - file, err := os.OpenFile(cfg.FilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + file, err := os.OpenFile(cfg.FilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600) if err != nil { // Fall back to stdout only, log warning to stderr _, _ = os.Stderr.WriteString("Warning: could not open log file " + cfg.FilePath + ": " + err.Error() + "\n") diff --git a/internal/infrastructure/logging/logger_test.go b/internal/infrastructure/logging/logger_test.go new file mode 100644 index 0000000..147ab47 --- /dev/null +++ b/internal/infrastructure/logging/logger_test.go @@ -0,0 +1,92 @@ +package logging + +import ( + "bytes" + "context" + "io" + "log/slog" + "os" + "path/filepath" + "runtime" + "testing" + "time" + + "github.com/eshaffer321/monarchmoney-sync-backend/internal/infrastructure/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewLoggerCreatesPrivateLogFile(t *testing.T) { + logPath := filepath.Join(t.TempDir(), "itemize.log") + + logger := NewLogger(config.LoggingConfig{ + Level: "debug", + FilePath: logPath, + }) + t.Cleanup(CloseLogFile) + + logger.Debug("private file check") + + info, err := os.Stat(logPath) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0600), info.Mode().Perm()) +} + +func TestIsTerminal(t *testing.T) { + assert.False(t, isTerminal(bytes.NewBuffer(nil))) + + file, err := os.CreateTemp(t.TempDir(), "not-a-terminal") + require.NoError(t, err) + defer file.Close() + + assert.False(t, isTerminal(file)) +} + +func TestMavenHandlerFormattingBranches(t *testing.T) { + var buf bytes.Buffer + handler := NewMavenHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}) + handler.useColors = true + + withAttrs := handler.WithAttrs([]slog.Attr{ + slog.String("system", "sync"), + slog.String("request_id", "abc123"), + }) + record := slog.NewRecord(time.Date(2026, 6, 20, 12, 34, 56, 0, time.UTC), slog.LevelWarn, "hello", 0) + record.AddAttrs(slog.String("system", "ignored"), slog.Int("count", 2)) + + require.NoError(t, withAttrs.Handle(context.Background(), record)) + output := buf.String() + assert.Contains(t, output, "[WARN]") + assert.Contains(t, output, "[sync]") + assert.Contains(t, output, "request_id=abc123") + assert.Contains(t, output, "count=2") + assert.NotContains(t, output, "system=ignored") +} + +func TestNewLoggerFallsBackWhenLogFileCannotOpen(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("directory write modes are not portable on Windows") + } + + dir := t.TempDir() + blockedPath := filepath.Join(dir, "blocked", "itemize.log") + logger := NewLogger(config.LoggingConfig{FilePath: blockedPath}) + require.NotNil(t, logger) + + CloseLogFile() + _, err := os.Stat(blockedPath) + assert.True(t, os.IsNotExist(err)) +} + +func TestMavenHandlerWriteError(t *testing.T) { + handler := NewMavenHandler(errorWriter{}, nil) + record := slog.NewRecord(time.Now(), slog.LevelInfo, "boom", 0) + + require.Error(t, handler.Handle(context.Background(), record)) +} + +type errorWriter struct{} + +func (errorWriter) Write(_ []byte) (int, error) { + return 0, io.ErrClosedPipe +} diff --git a/internal/infrastructure/logging/maven_handler.go b/internal/infrastructure/logging/maven_handler.go index 6a35ed5..df46d9f 100644 --- a/internal/infrastructure/logging/maven_handler.go +++ b/internal/infrastructure/logging/maven_handler.go @@ -8,8 +8,6 @@ import ( "os" "strings" "sync" - - "golang.org/x/term" ) // ANSI color codes @@ -58,7 +56,11 @@ func NewMavenHandler(w io.Writer, opts *slog.HandlerOptions) *MavenHandler { // isTerminal checks if the writer is a terminal (for color output) func isTerminal(w io.Writer) bool { if f, ok := w.(*os.File); ok { - return term.IsTerminal(int(f.Fd())) + info, err := f.Stat() + if err != nil { + return false + } + return info.Mode()&os.ModeCharDevice != 0 } return false }