From b3299f85f132717dcbeeeaea8b085837076b373e Mon Sep 17 00:00:00 2001 From: Erick Shaffer Date: Fri, 26 Jun 2026 21:24:35 -0600 Subject: [PATCH 1/4] feat: per-shipment item assignment for Amazon multi-delivery orders Adds CLIShipment type and GetItemsForCharge() on the Amazon Order adapter. When the scraper provides shipment groupings, each Monarch transaction in a multi-delivery order is split using only the items that were in that box rather than pro-rating the entire order. Falls back to all items when shipment data is absent or the order has a single shipment. --- internal/adapters/providers/amazon/order.go | 53 +++++++++++++++++++ internal/adapters/providers/amazon/parser.go | 34 ++++++++++++ .../adapters/providers/amazon/provider.go | 3 +- internal/adapters/providers/amazon/types.go | 16 ++++++ internal/application/sync/handlers/amazon.go | 20 +++++-- .../application/sync/handlers/amazon_test.go | 3 ++ 6 files changed, 125 insertions(+), 4 deletions(-) diff --git a/internal/adapters/providers/amazon/order.go b/internal/adapters/providers/amazon/order.go index 4094b50..fd3cb58 100644 --- a/internal/adapters/providers/amazon/order.go +++ b/internal/adapters/providers/amazon/order.go @@ -3,6 +3,7 @@ package amazon import ( "errors" "log/slog" + "math" "time" "github.com/eshaffer321/itemize/internal/adapters/providers" @@ -175,6 +176,58 @@ func (o *Order) GetNonBankAmount() (float64, error) { return nonBankTotal, nil } +// GetItemsForCharge returns the items belonging to the shipment that best +// matches the given bank charge amount. When shipment data is available this +// lets each Monarch transaction be split using only the items that were +// actually in that box rather than pro-rating the entire order. +// +// Matching works by estimating each shipment's charge as: +// +// shipmentSubtotal * (1 + taxRate) +// +// where taxRate = order.Tax / order.Subtotal. The shipment whose estimate is +// closest to chargeAmount wins. Falls back to all order items when no +// shipment data is present or when the order has only one shipment. +func (o *Order) GetItemsForCharge(chargeAmount float64) []providers.OrderItem { + if len(o.parsedOrder.Shipments) <= 1 { + return o.items + } + + taxRate := 0.0 + if o.parsedOrder.Subtotal > 0 { + taxRate = o.parsedOrder.Tax / o.parsedOrder.Subtotal + } + + bestIdx := -1 + bestDiff := math.MaxFloat64 + for i, shipment := range o.parsedOrder.Shipments { + var subtotal float64 + for _, item := range shipment.Items { + subtotal += item.Price * float64(item.Quantity) + } + estimated := subtotal * (1 + taxRate) + diff := math.Abs(estimated - chargeAmount) + if diff < bestDiff { + bestDiff = diff + bestIdx = i + } + } + + if bestIdx < 0 { + return o.items + } + + shipment := o.parsedOrder.Shipments[bestIdx] + items := make([]providers.OrderItem, 0, len(shipment.Items)) + for _, item := range shipment.Items { + items = append(items, &OrderItem{parsedItem: item}) + } + if len(items) == 0 { + return o.items + } + return items +} + // IsMultiDelivery checks if order was split into multiple shipments/charges // Returns true if there are multiple final charges func (o *Order) IsMultiDelivery() (bool, error) { diff --git a/internal/adapters/providers/amazon/parser.go b/internal/adapters/providers/amazon/parser.go index e35e663..69d3cd2 100644 --- a/internal/adapters/providers/amazon/parser.go +++ b/internal/adapters/providers/amazon/parser.go @@ -80,6 +80,15 @@ func ConvertCLIOrder(cliOrder CLIOrder) (*ParsedOrder, error) { order.Items = append(order.Items, item) } + // Parse shipments + for i, cliShipment := range cliOrder.Shipments { + shipment, err := convertCLIShipment(cliShipment) + if err != nil { + return nil, fmt.Errorf("failed to parse shipment %d: %w", i, err) + } + order.Shipments = append(order.Shipments, shipment) + } + // Parse transactions - fail on any transaction parse error for i, cliTx := range cliOrder.Transactions { tx, err := convertCLITransaction(cliTx) @@ -92,6 +101,31 @@ func ConvertCLIOrder(cliOrder CLIOrder) (*ParsedOrder, error) { return order, nil } +// convertCLIShipment converts a CLIShipment to a ParsedShipment +func convertCLIShipment(cliShipment CLIShipment) (*ParsedShipment, error) { + shipment := &ParsedShipment{ + Status: cliShipment.Status, + } + + if cliShipment.Date != "" { + date, err := parseDate(cliShipment.Date) + if err != nil { + return nil, fmt.Errorf("failed to parse shipment date %q: %w", cliShipment.Date, err) + } + shipment.Date = date + } + + for i, cliItem := range cliShipment.Items { + item, err := convertCLIItem(cliItem) + if err != nil { + return nil, fmt.Errorf("failed to parse shipment item %d (%q): %w", i, cliItem.Name, err) + } + shipment.Items = append(shipment.Items, item) + } + + return shipment, nil +} + // convertCLIItem converts a CLIOrderItem to a ParsedOrderItem func convertCLIItem(cliItem CLIOrderItem) (*ParsedOrderItem, error) { price, err := parseAmount(cliItem.Price) diff --git a/internal/adapters/providers/amazon/provider.go b/internal/adapters/providers/amazon/provider.go index eaedf25..3ca6cf8 100644 --- a/internal/adapters/providers/amazon/provider.go +++ b/internal/adapters/providers/amazon/provider.go @@ -12,6 +12,7 @@ import ( "bytes" "context" "fmt" + "io" "log/slog" "os" "os/exec" @@ -228,7 +229,7 @@ func (p *Provider) executeCLI(ctx context.Context, args []string) ([]byte, error var stdout, stderr bytes.Buffer cmd.Stdout = &stdout - cmd.Stderr = &stderr + cmd.Stderr = io.MultiWriter(&stderr, os.Stderr) // stream logs to terminal in real-time if p.browserDataDir != "" { cmd.Env = append(os.Environ(), "BROWSER_DATA_DIR="+p.browserDataDir) } diff --git a/internal/adapters/providers/amazon/types.go b/internal/adapters/providers/amazon/types.go index c93537e..ee10dce 100644 --- a/internal/adapters/providers/amazon/types.go +++ b/internal/adapters/providers/amazon/types.go @@ -7,6 +7,13 @@ type CLIOutput struct { Orders []CLIOrder `json:"orders"` } +// CLIShipment represents a shipment group from the CLI output +type CLIShipment struct { + Status string `json:"status"` // "Delivered", "Arriving", "Shipped" + Date string `json:"date"` // ISO 8601: "2025-12-15" + Items []CLIOrderItem `json:"items"` +} + // CLIOrder represents an order from the CLI output type CLIOrder struct { OrderID string `json:"orderId"` @@ -16,6 +23,7 @@ type CLIOrder struct { Tax string `json:"tax"` // "$6.58" Shipping string `json:"shipping"` // "$0.00" Items []CLIOrderItem `json:"items"` + Shipments []CLIShipment `json:"shipments"` Transactions []CLITransaction `json:"transactions"` } @@ -35,6 +43,13 @@ type CLITransaction struct { Description string `json:"description"` // "Prime Visa ****1211..." } +// ParsedShipment is the internal representation of a shipment group +type ParsedShipment struct { + Status string + Date time.Time + Items []*ParsedOrderItem +} + // ParsedOrder is the internal representation after parsing CLI output type ParsedOrder struct { ID string @@ -44,6 +59,7 @@ type ParsedOrder struct { Tax float64 Shipping float64 Items []*ParsedOrderItem + Shipments []*ParsedShipment Transactions []*ParsedTransaction } diff --git a/internal/application/sync/handlers/amazon.go b/internal/application/sync/handlers/amazon.go index 9d038a0..2199f2f 100644 --- a/internal/application/sync/handlers/amazon.go +++ b/internal/application/sync/handlers/amazon.go @@ -25,6 +25,10 @@ type AmazonOrder interface { GetFinalCharges() ([]float64, error) GetNonBankAmount() (float64, error) IsMultiDelivery() (bool, error) + // GetItemsForCharge returns only the items that belong to the shipment + // matching the given charge amount. Falls back to all items when shipment + // data is unavailable or the order has a single shipment. + GetItemsForCharge(chargeAmount float64) []providers.OrderItem } // TransactionConsolidator consolidates multiple transactions into one @@ -267,8 +271,16 @@ func (h *AmazonHandler) ProcessOrder( } // Step 6: Pro-rata allocation - items := make([]allocator.Item, len(order.GetItems())) - for i, item := range order.GetItems() { + // For multi-delivery orders, use only the items from the shipment that + // corresponds to this charge so each Monarch transaction is split correctly. + // For single-charge orders GetItemsForCharge returns all items. + chargeAmount := validation.BankChargesSum + if len(bankCharges) == 1 { + chargeAmount = bankCharges[0] + } + orderItems := order.GetItemsForCharge(chargeAmount) + items := make([]allocator.Item, len(orderItems)) + for i, item := range orderItems { items[i] = allocator.Item{ Name: item.GetName(), ListPrice: item.GetPrice(), @@ -291,10 +303,11 @@ func (h *AmazonHandler) ProcessOrder( "multiplier", allocResult.Multiplier, "total_allocated", allocResult.TotalAllocated) - // Step 7: Create an allocated order for the splitter + // Step 7: Create an allocated order for the splitter using the per-shipment items allocatedOrder := &allocatedAmazonOrder{ Order: order, allocations: allocResult.Allocations, + baseItems: orderItems, } // Step 8: Categorize and create splits @@ -378,6 +391,7 @@ func (b *bankChargeOrder) GetTotal() float64 { type allocatedAmazonOrder struct { providers.Order allocations []allocator.Allocation + baseItems []providers.OrderItem // the per-shipment items used for allocation } // GetItems returns items with allocated prices diff --git a/internal/application/sync/handlers/amazon_test.go b/internal/application/sync/handlers/amazon_test.go index 7cbfebf..c9158fb 100644 --- a/internal/application/sync/handlers/amazon_test.go +++ b/internal/application/sync/handlers/amazon_test.go @@ -52,6 +52,9 @@ func (m *mockAmazonOrder) GetNonBankAmount() (float64, error) { func (m *mockAmazonOrder) IsMultiDelivery() (bool, error) { return len(m.bankCharges) > 1, nil } +func (m *mockAmazonOrder) GetItemsForCharge(_ float64) []providers.OrderItem { + return m.items +} // mockItem implements providers.OrderItem type mockItem struct { From 981d156d075a102606fbc286d9b74efd296890d4 Mon Sep 17 00:00:00 2001 From: Erick Shaffer Date: Fri, 26 Jun 2026 22:02:19 -0600 Subject: [PATCH 2/4] fix: validate LLM category IDs and guard against hallucinated entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs fixed: 1. LLM returns invalid category ID (e.g. "Uncategorized") instead of a Monarch numeric ID → Monarch API rejects the update. Fix: validate each returned ID against the loaded category list; fall back to name match, then empty string if unresolvable. 2. When category ID is empty (unresolvable), the handler still tried to set it on the Monarch transaction, causing an API failure. Fix: skip the CategoryID field in UpdateTransactionParams when the ID is empty so we still write notes but don't fail the order. 3. LLM occasionally returns extra categorization entries (hallucination) for orders with fewer items. The extra entries inflated categoryGroups to size >1, sending a single-item order through the splits path with only 1 split → Monarch "must be split into two or more" error. Fix: truncate LLM results to len(uncachedItems) in CategorizeItems, and cap the categoryGroups loop in CreateSplits to len(items). Also updated the categorizer prompt to remove the Walmart-specific framing and explicitly instruct the model to use exact IDs from the list. --- internal/application/sync/handlers/amazon.go | 19 +++++-- internal/domain/categorizer/categorizer.go | 59 +++++++++++++++----- internal/domain/splitter/splitter.go | 9 ++- 3 files changed, 66 insertions(+), 21 deletions(-) diff --git a/internal/application/sync/handlers/amazon.go b/internal/application/sync/handlers/amazon.go index 2199f2f..a47d2ec 100644 --- a/internal/application/sync/handlers/amazon.go +++ b/internal/application/sync/handlers/amazon.go @@ -338,18 +338,29 @@ func (h *AmazonHandler) ProcessOrder( if !dryRun { params := &monarch.UpdateTransactionParams{ - CategoryID: &categoryID, - Notes: ¬es, + Notes: ¬es, + } + // Only set category if the LLM returned a valid Monarch category ID. + // An empty ID means the categorizer couldn't map to a known category — + // we still write notes so the order is recorded, but skip the category + // update to avoid a Monarch API error. + if categoryID != "" { + params.CategoryID = &categoryID + } else { + h.logWarn("Skipping category update — no valid Monarch category ID returned by LLM", + "order_id", order.GetID(), + "transaction_id", consolidatedTxn.ID, + "category_name", result.CategoryName) } if err := h.monarch.UpdateTransaction(ctx, consolidatedTxn.ID, params); err != nil { return nil, fmt.Errorf("update transaction error: %w", err) } - h.logDebug("Updated transaction category", + h.logDebug("Updated transaction notes", "order_id", order.GetID(), "transaction_id", consolidatedTxn.ID, "category_id", categoryID) } else { - h.logDebug("[DRY RUN] Would update transaction category", + h.logDebug("[DRY RUN] Would update transaction", "order_id", order.GetID(), "category_id", categoryID) } diff --git a/internal/domain/categorizer/categorizer.go b/internal/domain/categorizer/categorizer.go index a59b0b0..2a6377a 100644 --- a/internal/domain/categorizer/categorizer.go +++ b/internal/domain/categorizer/categorizer.go @@ -145,13 +145,41 @@ func (c *Categorizer) CategorizeItems(ctx context.Context, items []Item, categor return nil, fmt.Errorf("LLM categorization failed: %w", err) } + // Build a lookup so we can validate what the LLM returned + categoryByID := make(map[string]Category, len(categories)) + categoryByName := make(map[string]Category, len(categories)) + for _, c := range categories { + categoryByID[c.ID] = c + categoryByName[strings.ToLower(c.Name)] = c + } + + // Truncate extra entries — LLMs occasionally hallucinate more categorizations + // than items sent. Extra entries corrupt category-group detection downstream. + llmCategorizations := llmResult.Categorizations + if len(llmCategorizations) > len(uncachedItems) { + llmCategorizations = llmCategorizations[:len(uncachedItems)] + } + // Process LLM results - for _, cat := range llmResult.Categorizations { - // Cache the result - normalizedName := c.normalizeItemName(cat.ItemName) - c.cache.Set(normalizedName, cat.CategoryID) + for _, cat := range llmCategorizations { + // If the LLM returned an ID that isn't in the Monarch category list, + // try to recover via name match before falling back to empty. + if _, ok := categoryByID[cat.CategoryID]; !ok { + if matched, ok := categoryByName[strings.ToLower(cat.CategoryName)]; ok { + cat.CategoryID = matched.ID + cat.CategoryName = matched.Name + } else { + // No valid match — zero out the ID so callers know to skip category update + cat.CategoryID = "" + } + } + + // Only cache valid IDs so future lookups don't reuse a bad value + if cat.CategoryID != "" { + normalizedName := c.normalizeItemName(cat.ItemName) + c.cache.Set(normalizedName, cat.CategoryID) + } - // Add to results result.Categorizations = append(result.Categorizations, cat) } @@ -262,31 +290,32 @@ func (c *Categorizer) buildPrompt(items []Item, categories []Category) string { categoriesList.WriteString(fmt.Sprintf("- %s (ID: %s)\n", cat.Name, cat.ID)) } - prompt := fmt.Sprintf(`Please categorize the following Walmart items into the most appropriate categories. + prompt := fmt.Sprintf(`Please categorize the following items into the most appropriate categories. Items to categorize: %s -Available categories: +Available categories (use ONLY these exact IDs): %s IMPORTANT Instructions: -1. Match each item to the MOST appropriate category -2. Distinguish between different types of items: +1. Match each item to the MOST appropriate category from the list above +2. You MUST use the exact category_id values shown in the list — do NOT invent IDs or use words like "Uncategorized" +3. If no category is a good fit, pick the closest one available +4. Distinguish between different types of items: - "Groceries" should be used ONLY for food items (milk, bread, meat, produce, snacks, beverages) - - "Home & Garden" should be used for cleaning supplies, paper products (paper towels, toilet paper), laundry detergent, trash bags, and home maintenance items - - "Personal Care" should be used for toiletries like shampoo, deodorant, toothpaste, soap, cosmetics + - "Home & Garden" for cleaning supplies, paper products, laundry, trash bags, home maintenance + - "Personal Care" for toiletries: shampoo, deodorant, toothpaste, soap, cosmetics - "Health & Wellness" for vitamins, medicine, first aid -3. Do NOT put non-food items in Groceries even if purchased at a grocery store -4. Consider the item name carefully - "paper towels" is Home & Garden, not Groceries -5. Provide a confidence score (0.0 to 1.0) for each categorization +5. Do NOT put non-food items in Groceries +6. Provide a confidence score (0.0 to 1.0) for each categorization Return the result as a JSON object with this structure: { "categorizations": [ { "item_name": "exact item name", - "category_id": "category ID", + "category_id": "exact ID from the list above", "category_name": "category name", "confidence": 0.95 } diff --git a/internal/domain/splitter/splitter.go b/internal/domain/splitter/splitter.go index 1e60f77..d9e0653 100644 --- a/internal/domain/splitter/splitter.go +++ b/internal/domain/splitter/splitter.go @@ -73,9 +73,14 @@ func (s *Splitter) CreateSplits( s.lastOrderID = order.GetID() } - // Group items by category to detect single vs multi-category + // Group items by category to detect single vs multi-category. + // Cap at len(items): if the LLM returned extra entries (hallucination), only + // the first N entries map to real items, and those are all that matter. categoryGroups := make(map[string]bool) - for _, cat := range result.Categorizations { + for i, cat := range result.Categorizations { + if i >= len(items) { + break + } categoryGroups[cat.CategoryID] = true } From 6b8f5cc20ca783f1792adf74c865b8f5380d2977 Mon Sep 17 00:00:00 2001 From: Erick Shaffer Date: Sat, 27 Jun 2026 06:55:43 -0600 Subject: [PATCH 3/4] fix: mark transactions as reviewed to prevent Monarch rule re-categorization After setting category/splits via the Monarch API, transactions left in needsReview=true state are re-processed by Monarch's rule engine, which resets the category back to the rule's target (e.g. [TEMP] Amazon). Fix: set NeedsReview=false on UpdateTransaction for single-category updates. For split updates, call a follow-up UpdateTransaction after UpdateSplits to clear the review flag, since updateTransactionSplit doesn't expose needsReview directly. --- internal/application/sync/handlers/amazon.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/internal/application/sync/handlers/amazon.go b/internal/application/sync/handlers/amazon.go index a47d2ec..222a522 100644 --- a/internal/application/sync/handlers/amazon.go +++ b/internal/application/sync/handlers/amazon.go @@ -337,8 +337,10 @@ func (h *AmazonHandler) ProcessOrder( } if !dryRun { + reviewed := false params := &monarch.UpdateTransactionParams{ - Notes: ¬es, + Notes: ¬es, + NeedsReview: &reviewed, } // Only set category if the LLM returned a valid Monarch category ID. // An empty ID means the categorizer couldn't map to a known category — @@ -370,6 +372,17 @@ func (h *AmazonHandler) ProcessOrder( if err := h.monarch.UpdateSplits(ctx, consolidatedTxn.ID, splits); err != nil { return nil, fmt.Errorf("update splits error: %w", err) } + // Mark the parent transaction as reviewed so Monarch's rule engine + // doesn't re-categorize it after the split is applied. + reviewed := false + if err := h.monarch.UpdateTransaction(ctx, consolidatedTxn.ID, &monarch.UpdateTransactionParams{ + NeedsReview: &reviewed, + }); err != nil { + h.logWarn("Failed to mark split transaction as reviewed", + "order_id", order.GetID(), + "transaction_id", consolidatedTxn.ID, + "error", err) + } h.logDebug("Applied splits", "order_id", order.GetID(), "transaction_id", consolidatedTxn.ID, From 6db29cd0c6682b89ea1dac387f003b6d4a6b1804 Mon Sep 17 00:00:00 2001 From: Erick Shaffer Date: Sat, 27 Jun 2026 21:10:10 -0600 Subject: [PATCH 4/4] fix: Monarch-side subset discovery fallback for incomplete scraper charges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the Amazon scraper returns charges that don't sum to the order total (e.g. a shipment charge posted after the scraper visited the page), try to find a matching set of Monarch transactions via subset-sum search before giving up. Uses a 10-day date window and recursive backtracking over the small candidate set (typically 1–5 transactions). Also fixes a nil matcher panic in TestAmazonHandler_ProcessOrder_InvalidCharges — the test now supplies a real matcher since the handler calls FindSubsetByTotal on validation failure. --- internal/application/sync/handlers/amazon.go | 196 ++++++++++-------- .../application/sync/handlers/amazon_test.go | 8 +- internal/domain/matcher/subset.go | 106 ++++++++++ 3 files changed, 225 insertions(+), 85 deletions(-) create mode 100644 internal/domain/matcher/subset.go diff --git a/internal/application/sync/handlers/amazon.go b/internal/application/sync/handlers/amazon.go index 222a522..b3697fe 100644 --- a/internal/application/sync/handlers/amazon.go +++ b/internal/application/sync/handlers/amazon.go @@ -178,105 +178,140 @@ func (h *AmazonHandler) ProcessOrder( // Step 3: Validate charges validation := validator.ValidateCharges(bankCharges, order.GetTotal(), nonBankAmount) - if !validation.Valid { - h.logWarn("Charge validation failed", - "order_id", order.GetID(), - "reason", validation.Reason, - "bank_sum", validation.BankChargesSum, - "expected", validation.ExpectedSum, - "difference", validation.Difference) - result.Skipped = true - result.SkipReason = validation.Reason - return result, nil - } - - h.logDebug("Charge validation passed", - "order_id", order.GetID(), - "bank_sum", validation.BankChargesSum, - "expected", validation.ExpectedSum) // Step 4: Match to Monarch transactions var matchedTxns []*monarch.Transaction var consolidatedTxn *monarch.Transaction + monarchDiscovered := false // true when we matched via subset search rather than scraper charges - if len(bankCharges) > 1 { - // Multi-delivery order - find multiple matches - multiResult, err := h.matcher.FindMultipleMatches(order, monarchTxns, usedTxnIDs, bankCharges) - if err != nil { - return nil, fmt.Errorf("multi-match error: %w", err) - } + if !validation.Valid { + // Scraper charges are incomplete (common for multi-shipment orders where later + // charges post after the scraper visited the order details page). Try to find the + // matching Monarch transactions by searching for a subset that sums to the order total. + h.logDebug("Scraper charges incomplete, attempting Monarch-side discovery", + "order_id", order.GetID(), + "scraper_sum", validation.BankChargesSum, + "expected", validation.ExpectedSum) - if !multiResult.AllFound { - result.Skipped = true - result.SkipReason = fmt.Sprintf("could not find all transactions: expected %d, found %d", - len(bankCharges), len(multiResult.Matches)) - h.logWarn("Not all transactions found", + discovered, discoverErr := h.matcher.FindSubsetByTotal(order, monarchTxns, usedTxnIDs) + if discoverErr != nil { + h.logWarn("Charge validation failed and Monarch discovery found no match", "order_id", order.GetID(), - "expected", len(bankCharges), - "found", len(multiResult.Matches)) + "reason", validation.Reason, + "bank_sum", validation.BankChargesSum, + "expected", validation.ExpectedSum, + "difference", validation.Difference) + result.Skipped = true + result.SkipReason = validation.Reason return result, nil } - // Extract matched transactions - for _, match := range multiResult.Matches { - matchedTxns = append(matchedTxns, match.Transaction) - usedTxnIDs[match.Transaction.ID] = true + matchedTxns = discovered + monarchDiscovered = true + for _, t := range matchedTxns { + usedTxnIDs[t.ID] = true } - - h.logInfo("Matched all transactions for multi-delivery order", + h.logInfo("Monarch-side discovery found matching transactions", "order_id", order.GetID(), - "transaction_count", len(matchedTxns)) + "count", len(matchedTxns)) + } else { + h.logDebug("Charge validation passed", + "order_id", order.GetID(), + "bank_sum", validation.BankChargesSum, + "expected", validation.ExpectedSum) - // Step 5: Consolidate transactions - consolidationResult, err := h.consolidator.ConsolidateTransactions(ctx, matchedTxns, order, dryRun) - if err != nil { - return nil, fmt.Errorf("consolidation error: %w", err) - } - consolidatedTxn = consolidationResult.ConsolidatedTransaction + if len(bankCharges) > 1 { + // Multi-delivery order - find multiple matches + multiResult, err := h.matcher.FindMultipleMatches(order, monarchTxns, usedTxnIDs, bankCharges) + if err != nil { + return nil, fmt.Errorf("multi-match error: %w", err) + } - h.logInfo("Consolidated transactions", - "order_id", order.GetID(), - "consolidated_id", consolidatedTxn.ID, - "original_count", len(matchedTxns)) - } else { - // Single charge - find one match - // Use a wrapper order that returns the bank charge amount for matching - // This handles gift card orders where order total differs from bank charge - matchOrder := &bankChargeOrder{ - Order: order, - bankCharge: bankCharges[0], - } + if !multiResult.AllFound { + result.Skipped = true + result.SkipReason = fmt.Sprintf("could not find all transactions: expected %d, found %d", + len(bankCharges), len(multiResult.Matches)) + h.logWarn("Not all transactions found", + "order_id", order.GetID(), + "expected", len(bankCharges), + "found", len(multiResult.Matches)) + return result, nil + } - matchResult, err := h.matcher.FindMatch(matchOrder, monarchTxns, usedTxnIDs) - if err != nil { - return nil, fmt.Errorf("match error: %w", err) - } + for _, match := range multiResult.Matches { + matchedTxns = append(matchedTxns, match.Transaction) + usedTxnIDs[match.Transaction.ID] = true + } + h.logInfo("Matched all transactions for multi-delivery order", + "order_id", order.GetID(), + "transaction_count", len(matchedTxns)) + } else { + // Single charge - find one match + // Use a wrapper order that returns the bank charge amount for matching + // This handles gift card orders where order total differs from bank charge + matchOrder := &bankChargeOrder{ + Order: order, + bankCharge: bankCharges[0], + } - if matchResult == nil { - result.Skipped = true - result.SkipReason = "no matching transaction found" - h.logWarn("No matching transaction found", + matchResult, err := h.matcher.FindMatch(matchOrder, monarchTxns, usedTxnIDs) + if err != nil { + return nil, fmt.Errorf("match error: %w", err) + } + + if matchResult == nil { + result.Skipped = true + result.SkipReason = "no matching transaction found" + h.logWarn("No matching transaction found", + "order_id", order.GetID(), + "expected_amount", bankCharges[0]) + return result, nil + } + + consolidatedTxn = matchResult.Transaction + usedTxnIDs[consolidatedTxn.ID] = true + + h.logDebug("Matched single transaction", "order_id", order.GetID(), - "expected_amount", bankCharges[0]) - return result, nil + "transaction_id", consolidatedTxn.ID, + "amount", math.Abs(consolidatedTxn.Amount)) } + } - consolidatedTxn = matchResult.Transaction - usedTxnIDs[consolidatedTxn.ID] = true - - h.logDebug("Matched single transaction", - "order_id", order.GetID(), - "transaction_id", consolidatedTxn.ID, - "amount", math.Abs(consolidatedTxn.Amount)) + // Step 5: Consolidate multi-transaction matches + if consolidatedTxn == nil { + if len(matchedTxns) > 1 { + consolidationResult, err := h.consolidator.ConsolidateTransactions(ctx, matchedTxns, order, dryRun) + if err != nil { + return nil, fmt.Errorf("consolidation error: %w", err) + } + consolidatedTxn = consolidationResult.ConsolidatedTransaction + h.logInfo("Consolidated transactions", + "order_id", order.GetID(), + "consolidated_id", consolidatedTxn.ID, + "original_count", len(matchedTxns)) + } else if len(matchedTxns) == 1 { + consolidatedTxn = matchedTxns[0] + } } // Step 6: Pro-rata allocation - // For multi-delivery orders, use only the items from the shipment that - // corresponds to this charge so each Monarch transaction is split correctly. - // For single-charge orders GetItemsForCharge returns all items. - chargeAmount := validation.BankChargesSum - if len(bankCharges) == 1 { - chargeAmount = bankCharges[0] + // For Monarch-discovered charges, use the order total and all items since we + // don't have per-shipment mapping. For scraper-validated charges, use the + // per-shipment breakdown when available. + var chargeAmount float64 + var allocationTotal float64 + if monarchDiscovered { + chargeAmount = order.GetTotal() + for _, t := range matchedTxns { + allocationTotal += math.Abs(t.Amount) + } + } else { + chargeAmount = validation.BankChargesSum + if len(bankCharges) == 1 { + chargeAmount = bankCharges[0] + } + allocationTotal = validation.BankChargesSum } orderItems := order.GetItemsForCharge(chargeAmount) items := make([]allocator.Item, len(orderItems)) @@ -287,10 +322,6 @@ func (h *AmazonHandler) ProcessOrder( } } - // Use the sum of bank charges as the order total for allocation - // This is the actual amount charged to the bank - allocationTotal := validation.BankChargesSum - allocResult, err := allocator.Allocate(items, allocationTotal) if err != nil { return nil, fmt.Errorf("allocation error: %w", err) @@ -301,7 +332,8 @@ func (h *AmazonHandler) ProcessOrder( h.logDebug("Allocated costs", "order_id", order.GetID(), "multiplier", allocResult.Multiplier, - "total_allocated", allocResult.TotalAllocated) + "total_allocated", allocResult.TotalAllocated, + "monarch_discovered", monarchDiscovered) // Step 7: Create an allocated order for the splitter using the per-shipment items allocatedOrder := &allocatedAmazonOrder{ diff --git a/internal/application/sync/handlers/amazon_test.go b/internal/application/sync/handlers/amazon_test.go index c9158fb..49b1f7e 100644 --- a/internal/application/sync/handlers/amazon_test.go +++ b/internal/application/sync/handlers/amazon_test.go @@ -204,7 +204,8 @@ func TestAmazonHandler_ProcessOrder_ValidOrder(t *testing.T) { } func TestAmazonHandler_ProcessOrder_InvalidCharges(t *testing.T) { - // Order with missing bank charge + // Order with missing bank charge and no Monarch transactions to discover from. + // The handler should attempt Monarch-side discovery, find nothing, and skip. order := &mockAmazonOrder{ id: "test-missing-charge", date: time.Now(), @@ -214,12 +215,13 @@ func TestAmazonHandler_ProcessOrder_InvalidCharges(t *testing.T) { nonBankAmount: 0, } - handler := NewAmazonHandler(nil, nil, nil, nil, nil) + matcherCfg := matcher.Config{AmountTolerance: 0.01, DateTolerance: 5} + handler := NewAmazonHandler(matcher.NewMatcher(matcherCfg), nil, nil, nil, nil) result, err := handler.ProcessOrder( context.Background(), order, - nil, + nil, // no Monarch transactions — discovery will find nothing make(map[string]bool), nil, nil, false, diff --git a/internal/domain/matcher/subset.go b/internal/domain/matcher/subset.go new file mode 100644 index 0000000..d6249be --- /dev/null +++ b/internal/domain/matcher/subset.go @@ -0,0 +1,106 @@ +package matcher + +import ( + "fmt" + "math" + + "github.com/eshaffer321/monarchmoney-go/pkg/monarch" + "github.com/eshaffer321/itemize/internal/adapters/providers" +) + +// subsetDateTolerance is wider than the normal match window because multi-shipment +// charges can post several days after the order date. +const subsetDateTolerance = 10 + +// FindSubsetByTotal finds a subset of Monarch transactions whose absolute amounts +// sum to the order total. Used as a fallback when the Amazon scraper cannot +// discover all bank charges from the order's transaction page (e.g. when +// subsequent shipment charges post after the scraper visited the order page). +// +// Only negative (purchase) transactions are considered; refunds/credits are +// excluded. Returns the matched transactions or an error if no valid subset +// is found. +func (m *Matcher) FindSubsetByTotal( + order providers.Order, + monarchTxns []*monarch.Transaction, + usedTxnIDs map[string]bool, +) ([]*monarch.Transaction, error) { + target := order.GetTotal() + if target <= 0 { + return nil, fmt.Errorf("order total must be positive") + } + + orderDate := order.GetDate() + + // Collect purchase candidates within the date window + var candidates []*monarch.Transaction + for _, txn := range monarchTxns { + if usedTxnIDs[txn.ID] { + continue + } + if txn.Amount >= 0 { // skip refunds/credits + continue + } + days := math.Abs(txn.Date.Time.Sub(orderDate).Hours() / 24) + if days > subsetDateTolerance { + continue + } + candidates = append(candidates, txn) + } + + // Brute-force subset search — n is always small (typically 1–5) + matches := subsetSummingTo(candidates, target, m.config.AmountTolerance) + if matches == nil { + return nil, fmt.Errorf("no combination of Monarch transactions sums to order total $%.2f", target) + } + return matches, nil +} + +// subsetSummingTo returns the smallest subset of txns whose absolute amounts +// sum to target within tolerance, or nil if none exists. +func subsetSummingTo(txns []*monarch.Transaction, target, tolerance float64) []*monarch.Transaction { + n := len(txns) + if n > 20 { + n = 20 // guard; 2^20 is ~1M — still fast, but cap for safety + } + + // Try subsets in increasing size order so we prefer fewer transactions + for size := 1; size <= n; size++ { + result := subsetOfSize(txns[:n], target, tolerance, 0, size, nil) + if result != nil { + return result + } + } + return nil +} + +// subsetOfSize is a recursive backtracking search for a subset of exactly `size` +// transactions summing to target. +func subsetOfSize( + txns []*monarch.Transaction, + target, tolerance float64, + start, remaining int, + current []*monarch.Transaction, +) []*monarch.Transaction { + if remaining == 0 { + sum := 0.0 + for _, t := range current { + sum += math.Abs(t.Amount) + } + if math.Abs(sum-target) <= tolerance { + result := make([]*monarch.Transaction, len(current)) + copy(result, current) + return result + } + return nil + } + + for i := start; i <= len(txns)-remaining; i++ { + found := subsetOfSize(txns, target, tolerance, i+1, remaining-1, + append(current, txns[i])) + if found != nil { + return found + } + } + return nil +}