diff --git a/internal/adapters/providers/amazon/order.go b/internal/adapters/providers/amazon/order.go index 4094b50..fd3cb58 100644 --- a/internal/adapters/providers/amazon/order.go +++ b/internal/adapters/providers/amazon/order.go @@ -3,6 +3,7 @@ package amazon import ( "errors" "log/slog" + "math" "time" "github.com/eshaffer321/itemize/internal/adapters/providers" @@ -175,6 +176,58 @@ func (o *Order) GetNonBankAmount() (float64, error) { return nonBankTotal, nil } +// GetItemsForCharge returns the items belonging to the shipment that best +// matches the given bank charge amount. When shipment data is available this +// lets each Monarch transaction be split using only the items that were +// actually in that box rather than pro-rating the entire order. +// +// Matching works by estimating each shipment's charge as: +// +// shipmentSubtotal * (1 + taxRate) +// +// where taxRate = order.Tax / order.Subtotal. The shipment whose estimate is +// closest to chargeAmount wins. Falls back to all order items when no +// shipment data is present or when the order has only one shipment. +func (o *Order) GetItemsForCharge(chargeAmount float64) []providers.OrderItem { + if len(o.parsedOrder.Shipments) <= 1 { + return o.items + } + + taxRate := 0.0 + if o.parsedOrder.Subtotal > 0 { + taxRate = o.parsedOrder.Tax / o.parsedOrder.Subtotal + } + + bestIdx := -1 + bestDiff := math.MaxFloat64 + for i, shipment := range o.parsedOrder.Shipments { + var subtotal float64 + for _, item := range shipment.Items { + subtotal += item.Price * float64(item.Quantity) + } + estimated := subtotal * (1 + taxRate) + diff := math.Abs(estimated - chargeAmount) + if diff < bestDiff { + bestDiff = diff + bestIdx = i + } + } + + if bestIdx < 0 { + return o.items + } + + shipment := o.parsedOrder.Shipments[bestIdx] + items := make([]providers.OrderItem, 0, len(shipment.Items)) + for _, item := range shipment.Items { + items = append(items, &OrderItem{parsedItem: item}) + } + if len(items) == 0 { + return o.items + } + return items +} + // IsMultiDelivery checks if order was split into multiple shipments/charges // Returns true if there are multiple final charges func (o *Order) IsMultiDelivery() (bool, error) { diff --git a/internal/adapters/providers/amazon/parser.go b/internal/adapters/providers/amazon/parser.go index e35e663..69d3cd2 100644 --- a/internal/adapters/providers/amazon/parser.go +++ b/internal/adapters/providers/amazon/parser.go @@ -80,6 +80,15 @@ func ConvertCLIOrder(cliOrder CLIOrder) (*ParsedOrder, error) { order.Items = append(order.Items, item) } + // Parse shipments + for i, cliShipment := range cliOrder.Shipments { + shipment, err := convertCLIShipment(cliShipment) + if err != nil { + return nil, fmt.Errorf("failed to parse shipment %d: %w", i, err) + } + order.Shipments = append(order.Shipments, shipment) + } + // Parse transactions - fail on any transaction parse error for i, cliTx := range cliOrder.Transactions { tx, err := convertCLITransaction(cliTx) @@ -92,6 +101,31 @@ func ConvertCLIOrder(cliOrder CLIOrder) (*ParsedOrder, error) { return order, nil } +// convertCLIShipment converts a CLIShipment to a ParsedShipment +func convertCLIShipment(cliShipment CLIShipment) (*ParsedShipment, error) { + shipment := &ParsedShipment{ + Status: cliShipment.Status, + } + + if cliShipment.Date != "" { + date, err := parseDate(cliShipment.Date) + if err != nil { + return nil, fmt.Errorf("failed to parse shipment date %q: %w", cliShipment.Date, err) + } + shipment.Date = date + } + + for i, cliItem := range cliShipment.Items { + item, err := convertCLIItem(cliItem) + if err != nil { + return nil, fmt.Errorf("failed to parse shipment item %d (%q): %w", i, cliItem.Name, err) + } + shipment.Items = append(shipment.Items, item) + } + + return shipment, nil +} + // convertCLIItem converts a CLIOrderItem to a ParsedOrderItem func convertCLIItem(cliItem CLIOrderItem) (*ParsedOrderItem, error) { price, err := parseAmount(cliItem.Price) diff --git a/internal/adapters/providers/amazon/provider.go b/internal/adapters/providers/amazon/provider.go index eaedf25..3ca6cf8 100644 --- a/internal/adapters/providers/amazon/provider.go +++ b/internal/adapters/providers/amazon/provider.go @@ -12,6 +12,7 @@ import ( "bytes" "context" "fmt" + "io" "log/slog" "os" "os/exec" @@ -228,7 +229,7 @@ func (p *Provider) executeCLI(ctx context.Context, args []string) ([]byte, error var stdout, stderr bytes.Buffer cmd.Stdout = &stdout - cmd.Stderr = &stderr + cmd.Stderr = io.MultiWriter(&stderr, os.Stderr) // stream logs to terminal in real-time if p.browserDataDir != "" { cmd.Env = append(os.Environ(), "BROWSER_DATA_DIR="+p.browserDataDir) } diff --git a/internal/adapters/providers/amazon/types.go b/internal/adapters/providers/amazon/types.go index c93537e..ee10dce 100644 --- a/internal/adapters/providers/amazon/types.go +++ b/internal/adapters/providers/amazon/types.go @@ -7,6 +7,13 @@ type CLIOutput struct { Orders []CLIOrder `json:"orders"` } +// CLIShipment represents a shipment group from the CLI output +type CLIShipment struct { + Status string `json:"status"` // "Delivered", "Arriving", "Shipped" + Date string `json:"date"` // ISO 8601: "2025-12-15" + Items []CLIOrderItem `json:"items"` +} + // CLIOrder represents an order from the CLI output type CLIOrder struct { OrderID string `json:"orderId"` @@ -16,6 +23,7 @@ type CLIOrder struct { Tax string `json:"tax"` // "$6.58" Shipping string `json:"shipping"` // "$0.00" Items []CLIOrderItem `json:"items"` + Shipments []CLIShipment `json:"shipments"` Transactions []CLITransaction `json:"transactions"` } @@ -35,6 +43,13 @@ type CLITransaction struct { Description string `json:"description"` // "Prime Visa ****1211..." } +// ParsedShipment is the internal representation of a shipment group +type ParsedShipment struct { + Status string + Date time.Time + Items []*ParsedOrderItem +} + // ParsedOrder is the internal representation after parsing CLI output type ParsedOrder struct { ID string @@ -44,6 +59,7 @@ type ParsedOrder struct { Tax float64 Shipping float64 Items []*ParsedOrderItem + Shipments []*ParsedShipment Transactions []*ParsedTransaction } diff --git a/internal/application/sync/handlers/amazon.go b/internal/application/sync/handlers/amazon.go index 9d038a0..b3697fe 100644 --- a/internal/application/sync/handlers/amazon.go +++ b/internal/application/sync/handlers/amazon.go @@ -25,6 +25,10 @@ type AmazonOrder interface { GetFinalCharges() ([]float64, error) GetNonBankAmount() (float64, error) IsMultiDelivery() (bool, error) + // GetItemsForCharge returns only the items that belong to the shipment + // matching the given charge amount. Falls back to all items when shipment + // data is unavailable or the order has a single shipment. + GetItemsForCharge(chargeAmount float64) []providers.OrderItem } // TransactionConsolidator consolidates multiple transactions into one @@ -174,111 +178,150 @@ func (h *AmazonHandler) ProcessOrder( // Step 3: Validate charges validation := validator.ValidateCharges(bankCharges, order.GetTotal(), nonBankAmount) - if !validation.Valid { - h.logWarn("Charge validation failed", - "order_id", order.GetID(), - "reason", validation.Reason, - "bank_sum", validation.BankChargesSum, - "expected", validation.ExpectedSum, - "difference", validation.Difference) - result.Skipped = true - result.SkipReason = validation.Reason - return result, nil - } - - h.logDebug("Charge validation passed", - "order_id", order.GetID(), - "bank_sum", validation.BankChargesSum, - "expected", validation.ExpectedSum) // Step 4: Match to Monarch transactions var matchedTxns []*monarch.Transaction var consolidatedTxn *monarch.Transaction + monarchDiscovered := false // true when we matched via subset search rather than scraper charges - if len(bankCharges) > 1 { - // Multi-delivery order - find multiple matches - multiResult, err := h.matcher.FindMultipleMatches(order, monarchTxns, usedTxnIDs, bankCharges) - if err != nil { - return nil, fmt.Errorf("multi-match error: %w", err) - } + if !validation.Valid { + // Scraper charges are incomplete (common for multi-shipment orders where later + // charges post after the scraper visited the order details page). Try to find the + // matching Monarch transactions by searching for a subset that sums to the order total. + h.logDebug("Scraper charges incomplete, attempting Monarch-side discovery", + "order_id", order.GetID(), + "scraper_sum", validation.BankChargesSum, + "expected", validation.ExpectedSum) - if !multiResult.AllFound { - result.Skipped = true - result.SkipReason = fmt.Sprintf("could not find all transactions: expected %d, found %d", - len(bankCharges), len(multiResult.Matches)) - h.logWarn("Not all transactions found", + discovered, discoverErr := h.matcher.FindSubsetByTotal(order, monarchTxns, usedTxnIDs) + if discoverErr != nil { + h.logWarn("Charge validation failed and Monarch discovery found no match", "order_id", order.GetID(), - "expected", len(bankCharges), - "found", len(multiResult.Matches)) + "reason", validation.Reason, + "bank_sum", validation.BankChargesSum, + "expected", validation.ExpectedSum, + "difference", validation.Difference) + result.Skipped = true + result.SkipReason = validation.Reason return result, nil } - // Extract matched transactions - for _, match := range multiResult.Matches { - matchedTxns = append(matchedTxns, match.Transaction) - usedTxnIDs[match.Transaction.ID] = true + matchedTxns = discovered + monarchDiscovered = true + for _, t := range matchedTxns { + usedTxnIDs[t.ID] = true } - - h.logInfo("Matched all transactions for multi-delivery order", + h.logInfo("Monarch-side discovery found matching transactions", "order_id", order.GetID(), - "transaction_count", len(matchedTxns)) + "count", len(matchedTxns)) + } else { + h.logDebug("Charge validation passed", + "order_id", order.GetID(), + "bank_sum", validation.BankChargesSum, + "expected", validation.ExpectedSum) - // Step 5: Consolidate transactions - consolidationResult, err := h.consolidator.ConsolidateTransactions(ctx, matchedTxns, order, dryRun) - if err != nil { - return nil, fmt.Errorf("consolidation error: %w", err) - } - consolidatedTxn = consolidationResult.ConsolidatedTransaction + if len(bankCharges) > 1 { + // Multi-delivery order - find multiple matches + multiResult, err := h.matcher.FindMultipleMatches(order, monarchTxns, usedTxnIDs, bankCharges) + if err != nil { + return nil, fmt.Errorf("multi-match error: %w", err) + } - h.logInfo("Consolidated transactions", - "order_id", order.GetID(), - "consolidated_id", consolidatedTxn.ID, - "original_count", len(matchedTxns)) - } else { - // Single charge - find one match - // Use a wrapper order that returns the bank charge amount for matching - // This handles gift card orders where order total differs from bank charge - matchOrder := &bankChargeOrder{ - Order: order, - bankCharge: bankCharges[0], - } + if !multiResult.AllFound { + result.Skipped = true + result.SkipReason = fmt.Sprintf("could not find all transactions: expected %d, found %d", + len(bankCharges), len(multiResult.Matches)) + h.logWarn("Not all transactions found", + "order_id", order.GetID(), + "expected", len(bankCharges), + "found", len(multiResult.Matches)) + return result, nil + } - matchResult, err := h.matcher.FindMatch(matchOrder, monarchTxns, usedTxnIDs) - if err != nil { - return nil, fmt.Errorf("match error: %w", err) - } + for _, match := range multiResult.Matches { + matchedTxns = append(matchedTxns, match.Transaction) + usedTxnIDs[match.Transaction.ID] = true + } + h.logInfo("Matched all transactions for multi-delivery order", + "order_id", order.GetID(), + "transaction_count", len(matchedTxns)) + } else { + // Single charge - find one match + // Use a wrapper order that returns the bank charge amount for matching + // This handles gift card orders where order total differs from bank charge + matchOrder := &bankChargeOrder{ + Order: order, + bankCharge: bankCharges[0], + } - if matchResult == nil { - result.Skipped = true - result.SkipReason = "no matching transaction found" - h.logWarn("No matching transaction found", + matchResult, err := h.matcher.FindMatch(matchOrder, monarchTxns, usedTxnIDs) + if err != nil { + return nil, fmt.Errorf("match error: %w", err) + } + + if matchResult == nil { + result.Skipped = true + result.SkipReason = "no matching transaction found" + h.logWarn("No matching transaction found", + "order_id", order.GetID(), + "expected_amount", bankCharges[0]) + return result, nil + } + + consolidatedTxn = matchResult.Transaction + usedTxnIDs[consolidatedTxn.ID] = true + + h.logDebug("Matched single transaction", "order_id", order.GetID(), - "expected_amount", bankCharges[0]) - return result, nil + "transaction_id", consolidatedTxn.ID, + "amount", math.Abs(consolidatedTxn.Amount)) } + } - consolidatedTxn = matchResult.Transaction - usedTxnIDs[consolidatedTxn.ID] = true - - h.logDebug("Matched single transaction", - "order_id", order.GetID(), - "transaction_id", consolidatedTxn.ID, - "amount", math.Abs(consolidatedTxn.Amount)) + // Step 5: Consolidate multi-transaction matches + if consolidatedTxn == nil { + if len(matchedTxns) > 1 { + consolidationResult, err := h.consolidator.ConsolidateTransactions(ctx, matchedTxns, order, dryRun) + if err != nil { + return nil, fmt.Errorf("consolidation error: %w", err) + } + consolidatedTxn = consolidationResult.ConsolidatedTransaction + h.logInfo("Consolidated transactions", + "order_id", order.GetID(), + "consolidated_id", consolidatedTxn.ID, + "original_count", len(matchedTxns)) + } else if len(matchedTxns) == 1 { + consolidatedTxn = matchedTxns[0] + } } // Step 6: Pro-rata allocation - items := make([]allocator.Item, len(order.GetItems())) - for i, item := range order.GetItems() { + // For Monarch-discovered charges, use the order total and all items since we + // don't have per-shipment mapping. For scraper-validated charges, use the + // per-shipment breakdown when available. + var chargeAmount float64 + var allocationTotal float64 + if monarchDiscovered { + chargeAmount = order.GetTotal() + for _, t := range matchedTxns { + allocationTotal += math.Abs(t.Amount) + } + } else { + chargeAmount = validation.BankChargesSum + if len(bankCharges) == 1 { + chargeAmount = bankCharges[0] + } + allocationTotal = validation.BankChargesSum + } + orderItems := order.GetItemsForCharge(chargeAmount) + items := make([]allocator.Item, len(orderItems)) + for i, item := range orderItems { items[i] = allocator.Item{ Name: item.GetName(), ListPrice: item.GetPrice(), } } - // Use the sum of bank charges as the order total for allocation - // This is the actual amount charged to the bank - allocationTotal := validation.BankChargesSum - allocResult, err := allocator.Allocate(items, allocationTotal) if err != nil { return nil, fmt.Errorf("allocation error: %w", err) @@ -289,12 +332,14 @@ func (h *AmazonHandler) ProcessOrder( h.logDebug("Allocated costs", "order_id", order.GetID(), "multiplier", allocResult.Multiplier, - "total_allocated", allocResult.TotalAllocated) + "total_allocated", allocResult.TotalAllocated, + "monarch_discovered", monarchDiscovered) - // Step 7: Create an allocated order for the splitter + // Step 7: Create an allocated order for the splitter using the per-shipment items allocatedOrder := &allocatedAmazonOrder{ Order: order, allocations: allocResult.Allocations, + baseItems: orderItems, } // Step 8: Categorize and create splits @@ -324,19 +369,32 @@ func (h *AmazonHandler) ProcessOrder( } if !dryRun { + reviewed := false params := &monarch.UpdateTransactionParams{ - CategoryID: &categoryID, - Notes: ¬es, + Notes: ¬es, + NeedsReview: &reviewed, + } + // Only set category if the LLM returned a valid Monarch category ID. + // An empty ID means the categorizer couldn't map to a known category — + // we still write notes so the order is recorded, but skip the category + // update to avoid a Monarch API error. + if categoryID != "" { + params.CategoryID = &categoryID + } else { + h.logWarn("Skipping category update — no valid Monarch category ID returned by LLM", + "order_id", order.GetID(), + "transaction_id", consolidatedTxn.ID, + "category_name", result.CategoryName) } if err := h.monarch.UpdateTransaction(ctx, consolidatedTxn.ID, params); err != nil { return nil, fmt.Errorf("update transaction error: %w", err) } - h.logDebug("Updated transaction category", + h.logDebug("Updated transaction notes", "order_id", order.GetID(), "transaction_id", consolidatedTxn.ID, "category_id", categoryID) } else { - h.logDebug("[DRY RUN] Would update transaction category", + h.logDebug("[DRY RUN] Would update transaction", "order_id", order.GetID(), "category_id", categoryID) } @@ -346,6 +404,17 @@ func (h *AmazonHandler) ProcessOrder( if err := h.monarch.UpdateSplits(ctx, consolidatedTxn.ID, splits); err != nil { return nil, fmt.Errorf("update splits error: %w", err) } + // Mark the parent transaction as reviewed so Monarch's rule engine + // doesn't re-categorize it after the split is applied. + reviewed := false + if err := h.monarch.UpdateTransaction(ctx, consolidatedTxn.ID, &monarch.UpdateTransactionParams{ + NeedsReview: &reviewed, + }); err != nil { + h.logWarn("Failed to mark split transaction as reviewed", + "order_id", order.GetID(), + "transaction_id", consolidatedTxn.ID, + "error", err) + } h.logDebug("Applied splits", "order_id", order.GetID(), "transaction_id", consolidatedTxn.ID, @@ -378,6 +447,7 @@ func (b *bankChargeOrder) GetTotal() float64 { type allocatedAmazonOrder struct { providers.Order allocations []allocator.Allocation + baseItems []providers.OrderItem // the per-shipment items used for allocation } // GetItems returns items with allocated prices diff --git a/internal/application/sync/handlers/amazon_test.go b/internal/application/sync/handlers/amazon_test.go index 7cbfebf..49b1f7e 100644 --- a/internal/application/sync/handlers/amazon_test.go +++ b/internal/application/sync/handlers/amazon_test.go @@ -52,6 +52,9 @@ func (m *mockAmazonOrder) GetNonBankAmount() (float64, error) { func (m *mockAmazonOrder) IsMultiDelivery() (bool, error) { return len(m.bankCharges) > 1, nil } +func (m *mockAmazonOrder) GetItemsForCharge(_ float64) []providers.OrderItem { + return m.items +} // mockItem implements providers.OrderItem type mockItem struct { @@ -201,7 +204,8 @@ func TestAmazonHandler_ProcessOrder_ValidOrder(t *testing.T) { } func TestAmazonHandler_ProcessOrder_InvalidCharges(t *testing.T) { - // Order with missing bank charge + // Order with missing bank charge and no Monarch transactions to discover from. + // The handler should attempt Monarch-side discovery, find nothing, and skip. order := &mockAmazonOrder{ id: "test-missing-charge", date: time.Now(), @@ -211,12 +215,13 @@ func TestAmazonHandler_ProcessOrder_InvalidCharges(t *testing.T) { nonBankAmount: 0, } - handler := NewAmazonHandler(nil, nil, nil, nil, nil) + matcherCfg := matcher.Config{AmountTolerance: 0.01, DateTolerance: 5} + handler := NewAmazonHandler(matcher.NewMatcher(matcherCfg), nil, nil, nil, nil) result, err := handler.ProcessOrder( context.Background(), order, - nil, + nil, // no Monarch transactions — discovery will find nothing make(map[string]bool), nil, nil, false, diff --git a/internal/domain/categorizer/categorizer.go b/internal/domain/categorizer/categorizer.go index a59b0b0..2a6377a 100644 --- a/internal/domain/categorizer/categorizer.go +++ b/internal/domain/categorizer/categorizer.go @@ -145,13 +145,41 @@ func (c *Categorizer) CategorizeItems(ctx context.Context, items []Item, categor return nil, fmt.Errorf("LLM categorization failed: %w", err) } + // Build a lookup so we can validate what the LLM returned + categoryByID := make(map[string]Category, len(categories)) + categoryByName := make(map[string]Category, len(categories)) + for _, c := range categories { + categoryByID[c.ID] = c + categoryByName[strings.ToLower(c.Name)] = c + } + + // Truncate extra entries — LLMs occasionally hallucinate more categorizations + // than items sent. Extra entries corrupt category-group detection downstream. + llmCategorizations := llmResult.Categorizations + if len(llmCategorizations) > len(uncachedItems) { + llmCategorizations = llmCategorizations[:len(uncachedItems)] + } + // Process LLM results - for _, cat := range llmResult.Categorizations { - // Cache the result - normalizedName := c.normalizeItemName(cat.ItemName) - c.cache.Set(normalizedName, cat.CategoryID) + for _, cat := range llmCategorizations { + // If the LLM returned an ID that isn't in the Monarch category list, + // try to recover via name match before falling back to empty. + if _, ok := categoryByID[cat.CategoryID]; !ok { + if matched, ok := categoryByName[strings.ToLower(cat.CategoryName)]; ok { + cat.CategoryID = matched.ID + cat.CategoryName = matched.Name + } else { + // No valid match — zero out the ID so callers know to skip category update + cat.CategoryID = "" + } + } + + // Only cache valid IDs so future lookups don't reuse a bad value + if cat.CategoryID != "" { + normalizedName := c.normalizeItemName(cat.ItemName) + c.cache.Set(normalizedName, cat.CategoryID) + } - // Add to results result.Categorizations = append(result.Categorizations, cat) } @@ -262,31 +290,32 @@ func (c *Categorizer) buildPrompt(items []Item, categories []Category) string { categoriesList.WriteString(fmt.Sprintf("- %s (ID: %s)\n", cat.Name, cat.ID)) } - prompt := fmt.Sprintf(`Please categorize the following Walmart items into the most appropriate categories. + prompt := fmt.Sprintf(`Please categorize the following items into the most appropriate categories. Items to categorize: %s -Available categories: +Available categories (use ONLY these exact IDs): %s IMPORTANT Instructions: -1. Match each item to the MOST appropriate category -2. Distinguish between different types of items: +1. Match each item to the MOST appropriate category from the list above +2. You MUST use the exact category_id values shown in the list — do NOT invent IDs or use words like "Uncategorized" +3. If no category is a good fit, pick the closest one available +4. Distinguish between different types of items: - "Groceries" should be used ONLY for food items (milk, bread, meat, produce, snacks, beverages) - - "Home & Garden" should be used for cleaning supplies, paper products (paper towels, toilet paper), laundry detergent, trash bags, and home maintenance items - - "Personal Care" should be used for toiletries like shampoo, deodorant, toothpaste, soap, cosmetics + - "Home & Garden" for cleaning supplies, paper products, laundry, trash bags, home maintenance + - "Personal Care" for toiletries: shampoo, deodorant, toothpaste, soap, cosmetics - "Health & Wellness" for vitamins, medicine, first aid -3. Do NOT put non-food items in Groceries even if purchased at a grocery store -4. Consider the item name carefully - "paper towels" is Home & Garden, not Groceries -5. Provide a confidence score (0.0 to 1.0) for each categorization +5. Do NOT put non-food items in Groceries +6. Provide a confidence score (0.0 to 1.0) for each categorization Return the result as a JSON object with this structure: { "categorizations": [ { "item_name": "exact item name", - "category_id": "category ID", + "category_id": "exact ID from the list above", "category_name": "category name", "confidence": 0.95 } diff --git a/internal/domain/matcher/subset.go b/internal/domain/matcher/subset.go new file mode 100644 index 0000000..d6249be --- /dev/null +++ b/internal/domain/matcher/subset.go @@ -0,0 +1,106 @@ +package matcher + +import ( + "fmt" + "math" + + "github.com/eshaffer321/monarchmoney-go/pkg/monarch" + "github.com/eshaffer321/itemize/internal/adapters/providers" +) + +// subsetDateTolerance is wider than the normal match window because multi-shipment +// charges can post several days after the order date. +const subsetDateTolerance = 10 + +// FindSubsetByTotal finds a subset of Monarch transactions whose absolute amounts +// sum to the order total. Used as a fallback when the Amazon scraper cannot +// discover all bank charges from the order's transaction page (e.g. when +// subsequent shipment charges post after the scraper visited the order page). +// +// Only negative (purchase) transactions are considered; refunds/credits are +// excluded. Returns the matched transactions or an error if no valid subset +// is found. +func (m *Matcher) FindSubsetByTotal( + order providers.Order, + monarchTxns []*monarch.Transaction, + usedTxnIDs map[string]bool, +) ([]*monarch.Transaction, error) { + target := order.GetTotal() + if target <= 0 { + return nil, fmt.Errorf("order total must be positive") + } + + orderDate := order.GetDate() + + // Collect purchase candidates within the date window + var candidates []*monarch.Transaction + for _, txn := range monarchTxns { + if usedTxnIDs[txn.ID] { + continue + } + if txn.Amount >= 0 { // skip refunds/credits + continue + } + days := math.Abs(txn.Date.Time.Sub(orderDate).Hours() / 24) + if days > subsetDateTolerance { + continue + } + candidates = append(candidates, txn) + } + + // Brute-force subset search — n is always small (typically 1–5) + matches := subsetSummingTo(candidates, target, m.config.AmountTolerance) + if matches == nil { + return nil, fmt.Errorf("no combination of Monarch transactions sums to order total $%.2f", target) + } + return matches, nil +} + +// subsetSummingTo returns the smallest subset of txns whose absolute amounts +// sum to target within tolerance, or nil if none exists. +func subsetSummingTo(txns []*monarch.Transaction, target, tolerance float64) []*monarch.Transaction { + n := len(txns) + if n > 20 { + n = 20 // guard; 2^20 is ~1M — still fast, but cap for safety + } + + // Try subsets in increasing size order so we prefer fewer transactions + for size := 1; size <= n; size++ { + result := subsetOfSize(txns[:n], target, tolerance, 0, size, nil) + if result != nil { + return result + } + } + return nil +} + +// subsetOfSize is a recursive backtracking search for a subset of exactly `size` +// transactions summing to target. +func subsetOfSize( + txns []*monarch.Transaction, + target, tolerance float64, + start, remaining int, + current []*monarch.Transaction, +) []*monarch.Transaction { + if remaining == 0 { + sum := 0.0 + for _, t := range current { + sum += math.Abs(t.Amount) + } + if math.Abs(sum-target) <= tolerance { + result := make([]*monarch.Transaction, len(current)) + copy(result, current) + return result + } + return nil + } + + for i := start; i <= len(txns)-remaining; i++ { + found := subsetOfSize(txns, target, tolerance, i+1, remaining-1, + append(current, txns[i])) + if found != nil { + return found + } + } + return nil +} diff --git a/internal/domain/splitter/splitter.go b/internal/domain/splitter/splitter.go index 1e60f77..d9e0653 100644 --- a/internal/domain/splitter/splitter.go +++ b/internal/domain/splitter/splitter.go @@ -73,9 +73,14 @@ func (s *Splitter) CreateSplits( s.lastOrderID = order.GetID() } - // Group items by category to detect single vs multi-category + // Group items by category to detect single vs multi-category. + // Cap at len(items): if the LLM returned extra entries (hallucination), only + // the first N entries map to real items, and those are all that matter. categoryGroups := make(map[string]bool) - for _, cat := range result.Categorizations { + for i, cat := range result.Categorizations { + if i >= len(items) { + break + } categoryGroups[cat.CategoryID] = true }