From 7640e79c444df2c285d8e23e8bf716301c61518a Mon Sep 17 00:00:00 2001 From: Shivam Chaudhary Date: Mon, 25 May 2026 13:54:46 +0100 Subject: [PATCH] ice: backport XSK queue disable/enable fixes from upstream Linux Backport 4 upstream commits that fix race conditions during AF_XDP/XSK socket setup/teardown causing false TX watchdog timeouts, NULL pointer dereferences, and workqueue deadlocks: - 99099c6bc75a ("ice: reorder disabling IRQ and NAPI in ice_qp_dis") - 405d9999aa0b ("ice: replace synchronize_rcu with synchronize_net") - 9da75a511c55 ("ice: toggle netif_carrier when setting up XSK pool") - 7e3b407ccbea ("ice: remove ICE_CFG_BUSY locking from AF_XDP code") All fix the original 2d4238f55697 ("ice: Add support for AF_XDP"). These commits were cherry-picked from torvalds/linux but required manual adaptation due to API differences between upstream and the out-of-tree driver. Without this patch, hosts crash within 1-4 XSK lifecycle iterations under TX load. With the patch applied, hosts survive 200+ iterations under sustained outbound TX pressure (~200 Gbps). Tested on: - Intel E810 (PCI 8086:1592) - Kernel 5.10.252/253 (Amazon Linux 2) Signed-off-by: Shivam Chaudhary --- src/ice_xsk.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/ice_xsk.c b/src/ice_xsk.c index ccb8bdf..d83287e 100644 --- a/src/ice_xsk.c +++ b/src/ice_xsk.c @@ -61,7 +61,6 @@ static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) ice_clean_tx_ring(vsi->tx_rings[q_idx]); if (ice_is_xdp_ena_vsi(vsi)) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -170,7 +169,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) struct ice_q_vector *q_vector; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; - int timeout = 50; int err; if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) @@ -180,15 +178,12 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) rx_ring = vsi->rx_rings[q_idx]; q_vector = rx_ring->q_vector; - while (test_and_set_bit(ICE_CFG_BUSY, vsi->back->state)) { - timeout--; - if (!timeout) - return -EBUSY; - usleep_range(1000, 2000); - } + synchronize_net(); + netif_carrier_off(vsi->netdev); netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); @@ -210,7 +205,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) #ifdef HAVE_XSK_BATCHED_RX_ALLOC ice_clean_rx_ring(rx_ring); #endif - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -230,6 +224,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) struct ice_q_vector *q_vector; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; + bool link_up; u16 size; int err; @@ -274,11 +269,14 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(ICE_CFG_BUSY, vsi->back->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); - netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + ice_get_link_status(vsi->port_info, &link_up); + if (link_up) { + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + netif_carrier_on(vsi->netdev); + } free_buf: kfree(qg_buf); return err;