From c7ea2716ebc46eb9cab0f4a60fdfc86aad49f592 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 8 Apr 2026 12:27:29 +0200 Subject: [PATCH 01/13] [WiP/TMP] ixgbevf: get and set channels via ethtool num_online_cpus() is not a good goal for number of VF queues. We only have 3 interrupts of which 1 is used for mbx, so effectively we have 2 channels max, only 2 CPUs are used. More queue pairs are still useful for AF_XDP/TCs. Good idea to default to 2 queue pairs by default. Another option is to limit Rx queue number to num_online_cpus(), and hence always have possible XDP without TCs. Handling of queue number when dealing with TCs has to bee updated too: * how does VF know, if the configuration is a result of DCB, and hence incompatible with XDP vs GDV TCs * we should not limit number of Tx queues to 1 when there are TCs, Rx queue number also doesn't have to be equal to TC, as LKV allows 2 queues per TC, and this makes RSS sense Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 71 +++++++++ drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 13 ++ .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 148 ++++++++++-------- 3 files changed, 168 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 274eef39c58618..387eae14ed44d7 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -916,6 +916,75 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, return err; } +static void ixgbevf_get_reported_q_num(u32 rx, u32 tx, u32 *dst_combined, + u32 *dst_rx, u32 *dst_tx) +{ + rx = min_t(u32, rx, num_online_cpus()); + *dst_combined = min_t(u32, rx, tx); + if (rx > tx) { + *dst_rx = rx - tx; + *dst_tx = 0; + } else { + *dst_tx = tx - rx; + *dst_rx = 0; + } +} + +static void ixgbevf_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + + ixgbevf_get_reported_q_num(adapter->num_rx_queues, + adapter->num_tx_queues, + &ch->combined_count, &ch->rx_count, + &ch->tx_count); + + ixgbevf_get_reported_q_num(adapter->q_caps.max_rxqs, + adapter->q_caps.max_txqs, + &ch->max_combined, &ch->max_rx, + &ch->max_tx); + + ch->max_other = NON_Q_VECTORS; + ch->other_count = NON_Q_VECTORS; +} + +static int ixgbevf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + u32 num_req = ch->combined_count; + bool was_up; + int err = 0; + + /* Do not change queue number if DCB is enabled */ + if (adapter->q_caps.num_tcs > 1) + return -EOPNOTSUPP; + + if (ch->rx_count || ch->tx_count || ch->other_count != NON_Q_VECTORS) + return -EINVAL; + + if (num_req == adapter->num_rx_queues && + num_req == adapter->num_tx_queues) + return 0; + + adapter->num_req_qpairs = num_req; + + was_up = netif_running(netdev); + if (was_up) + ixgbevf_close(netdev); + + ixgbevf_clear_interrupt_scheme(adapter); + err = ixgbevf_init_interrupt_scheme(adapter); + if (err) + return err; + + if (was_up) + ixgbevf_open(netdev); + + return 0; +} + static const struct ethtool_ops ixgbevf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbevf_get_drvinfo, @@ -938,6 +1007,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, + .get_channels = ixgbevf_get_channels, + .set_channels = ixgbevf_set_channels, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index d8f841515ca62a..3c3ba709e52ba6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -270,6 +270,15 @@ static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value) #define IXGBEVF_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +struct ixgbevf_q_caps { + u32 min_rxqs; + u32 max_rxqs; + u32 min_txqs; + u32 max_txqs; + u32 num_tcs; + u32 def_tc; +}; + /* board specific private data structure */ struct ixgbevf_adapter { /* this field must be first, see ixgbevf_process_skb_fields */ @@ -346,6 +355,8 @@ struct ixgbevf_adapter { u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE]; u32 flags; bool link_state; + struct ixgbevf_q_caps q_caps; + u32 num_req_qpairs; #ifdef CONFIG_XFRM struct ixgbevf_ipsec *ipsec; @@ -432,6 +443,8 @@ void ixgbevf_free_tx_resources(struct ixgbevf_ring *); void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring); void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring); void ixgbevf_update_stats(struct ixgbevf_adapter *adapter); +int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter); +void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter); int ethtool_ioctl(struct ifreq *ifr); extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3006d6dae7147d..b17c53a00b640e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1814,47 +1814,31 @@ static void ixgbevf_napi_disable_all(struct ixgbevf_adapter *adapter) } } -static int ixgbevf_configure_dcb(struct ixgbevf_adapter *adapter) +static void ixgbevf_configure_dcb(struct ixgbevf_adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; - unsigned int def_q = 0; - unsigned int num_tcs = 0; unsigned int num_rx_queues = adapter->num_rx_queues; unsigned int num_tx_queues = adapter->num_tx_queues; - int err; - - spin_lock_bh(&adapter->mbx_lock); - - /* fetch queue configuration from the PF */ - err = ixgbevf_get_queues(hw, &num_tcs, &def_q); - - spin_unlock_bh(&adapter->mbx_lock); - if (err) - return err; - - if (num_tcs > 1) { + if (adapter->q_caps.num_tcs > 1) { /* we need only one Tx queue */ num_tx_queues = 1; /* update default Tx ring register index */ - adapter->tx_ring[0]->reg_idx = def_q; + adapter->tx_ring[0]->reg_idx = adapter->q_caps.def_tc; /* we need as many queues as traffic classes */ - num_rx_queues = num_tcs; + num_rx_queues = adapter->q_caps.num_tcs; } /* if we have a bad config abort request queue reset */ if ((adapter->num_rx_queues != num_rx_queues) || (adapter->num_tx_queues != num_tx_queues)) { /* force mailbox timeout to prevent further messages */ - hw->mbx.timeout = 0; + adapter->hw.mbx.timeout = 0; /* wait for watchdog to come around and bail us out */ set_bit(__IXGBEVF_QUEUE_RESET_REQUESTED, &adapter->state); } - - return 0; } static void ixgbevf_configure(struct ixgbevf_adapter *adapter) @@ -2254,6 +2238,60 @@ static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter, return 0; } +static void ixgbevf_cfg_queue_caps(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 max_qpairs, rss; + int err; + + adapter->q_caps = (typeof(adapter->q_caps)) { + .min_rxqs = 1, + .min_txqs = 1, + .max_rxqs = 1, + .max_txqs = 1, + }; + + /* fetch queue configuration from the PF */ + spin_lock_bh(&adapter->mbx_lock); + err = ixgbevf_get_queues(hw, &adapter->q_caps.num_tcs, + &adapter->q_caps.def_tc); + spin_unlock_bh(&adapter->mbx_lock); + if (err) + return; + + /* we need as many queues as traffic classes */ + if (adapter->q_caps.num_tcs > 1) { + adapter->q_caps.min_rxqs = adapter->q_caps.num_tcs; + adapter->q_caps.max_rxqs = adapter->q_caps.num_tcs; + return; + } + + switch (hw->api_version) { + case ixgbe_mbox_api_11: + case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: + max_qpairs = min_t(u32, hw->mac.max_rx_queues, + hw->mac.max_tx_queues); + if (adapter->xdp_prog) + max_qpairs = min_t(u32, max_qpairs, + hw->mac.max_tx_queues / 2); + break; + default: + max_qpairs = adapter->xdp_prog ? IXGBEVF_MAX_RSS_QUEUES / 2 : + IXGBEVF_MAX_RSS_QUEUES; + break; + } + + rss = min_t(u32, max_qpairs, num_online_cpus()); + + adapter->q_caps.max_rxqs = rss; + adapter->q_caps.max_txqs = rss; +} + /** * ixgbevf_set_num_queues - Allocate queues for device, feature dependent * @adapter: board private structure to initialize @@ -2267,51 +2305,26 @@ static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter, **/ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; - unsigned int def_q = 0; - unsigned int num_tcs = 0; - int err; + ixgbevf_cfg_queue_caps(adapter); - /* Start with base case */ - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - adapter->num_xdp_queues = 0; + if (adapter->q_caps.num_tcs > 1) { + adapter->num_rx_queues = adapter->q_caps.max_rxqs; + adapter->num_tx_queues = adapter->q_caps.max_txqs; + adapter->num_xdp_queues = 0; - spin_lock_bh(&adapter->mbx_lock); - - /* fetch queue configuration from the PF */ - err = ixgbevf_get_queues(hw, &num_tcs, &def_q); - - spin_unlock_bh(&adapter->mbx_lock); - - if (err) return; - - /* we need as many queues as traffic classes */ - if (num_tcs > 1) { - adapter->num_rx_queues = num_tcs; } else { - u16 rss = min_t(u16, num_online_cpus(), IXGBEVF_MAX_RSS_QUEUES); - - switch (hw->api_version) { - case ixgbe_mbox_api_11: - case ixgbe_mbox_api_12: - case ixgbe_mbox_api_13: - case ixgbe_mbox_api_14: - case ixgbe_mbox_api_15: - case ixgbe_mbox_api_16: - case ixgbe_mbox_api_17: - if (adapter->xdp_prog && - hw->mac.max_tx_queues == rss) - rss = rss > 3 ? 2 : 1; - - adapter->num_rx_queues = rss; - adapter->num_tx_queues = rss; - adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0; - break; - default: - break; - } + u32 max_qpairs = min_t(u32, adapter->q_caps.max_rxqs, + adapter->q_caps.max_txqs); + + adapter->num_req_qpairs = + adapter->num_req_qpairs ? : adapter->q_caps.max_rxqs; + WARN_ON_ONCE(adapter->num_req_qpairs > max_qpairs); + adapter->num_rx_queues = + min_t(u32, adapter->num_req_qpairs, max_qpairs); + adapter->num_tx_queues = adapter->num_rx_queues; + adapter->num_xdp_queues = adapter->xdp_prog ? + adapter->num_rx_queues : 0; } } @@ -2623,7 +2636,7 @@ static void ixgbevf_reset_interrupt_capability(struct ixgbevf_adapter *adapter) * @adapter: board private structure to initialize * **/ -static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) +int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) { int err; @@ -2664,7 +2677,7 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) * We go through and clear interrupt specific resources and reset the structure * to pre-load conditions **/ -static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) +void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) { adapter->num_tx_queues = 0; adapter->num_xdp_queues = 0; @@ -4109,6 +4122,13 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, return -EOPNOTSUPP; } + if (!adapter->xdp_prog && prog && + adapter->num_req_qpairs * 2 > adapter->q_caps.max_txqs) { + NL_SET_ERR_MSG_MOD(extack, + "Number of configured queue pairs should be half of the maximum or less to configure XDP"); + return -EINVAL; + } + old_prog = xchg(&adapter->xdp_prog, prog); /* If transitioning XDP modes reconfigure rings */ From 873a6f799ea0c1b24ee411da6fbaa2eeb781ce91 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 3 Apr 2026 09:04:07 +0200 Subject: [PATCH 02/13] libeth: add unprep callback for XDP Tx flushing Currently, XDP Tx bulk flush implicitely unlocks the used XDP Tx queue, which is assumed to be locked inside of the prep() callback. This does not permit the usage of any other locking mechanism, e.g. locking netdev queues as igc does to circumvent resource limitations. Given that locking is assumed to be done by the prep() callback, add an unprep() callback, which is meant to unconditionally undo the effects of prep(). Add a default unprep() which unlocks XDP TxQ in order to not disturb the existing driver code. Signed-off-by: Larysa Zaremba --- include/net/libeth/xdp.h | 37 +++++++++++++++++++++++++------------ include/net/libeth/xsk.h | 28 ++++++++++++++++++---------- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 2e2154ccecae61..c44b9260494059 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -521,7 +521,8 @@ libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, (*fill)(struct libeth_xdp_tx_frame frm, u32 i, const struct libeth_xdpsq *sq, u64 priv), void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, - const struct libeth_xdpsq *sq, u64 priv)) + const struct libeth_xdpsq *sq, u64 priv), + void (*unprep)(void *xdpsq, struct libeth_xdpsq *sq)) { struct libeth_xdpsq sq __uninitialized; u32 this, batched, off = 0; @@ -579,7 +580,7 @@ libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, *sq.xdp_tx += n; unlock: - libeth_xdpsq_unlock(sq.lock); + unprep(xdpsq, &sq); return n; } @@ -775,14 +776,15 @@ __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, const struct libeth_xdpsq *sq, u64 priv), void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, const struct libeth_xdpsq *sq, - u64 priv)) + u64 priv), + void (*unprep)(void *xdpsq, struct libeth_xdpsq *sq)) { u32 sent, drops; int err = 0; sent = libeth_xdp_tx_xmit_bulk(bq->bulk, bq->xdpsq, min(bq->count, LIBETH_XDP_TX_BULK), - false, 0, prep, fill, xmit); + false, 0, prep, fill, xmit, unprep); drops = bq->count - sent; if (unlikely(drops)) { @@ -807,9 +809,9 @@ __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, * Use via LIBETH_XDP_DEFINE_FLUSH_TX() to define an ``XDP_TX`` driver * callback. */ -#define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit) \ +#define libeth_xdp_tx_flush_bulk(bq, flags, prep, xmit, unprep) \ __libeth_xdp_tx_flush_bulk(bq, flags, prep, libeth_xdp_tx_fill_buf, \ - xmit) + xmit, unprep) /* .ndo_xdp_xmit() implementation */ @@ -1017,6 +1019,12 @@ libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, return desc; } +static inline void libeth_xdp_tx_unprep(void *xdpsq __always_unused, + struct libeth_xdpsq *sq) +{ + libeth_xdpsq_unlock(sq->lock); +} + /** * libeth_xdp_xmit_flush_bulk - wrapper to define flush of one XDP xmit bulk * @bq: bulk to flush @@ -1027,9 +1035,10 @@ libeth_xdp_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, * Use via LIBETH_XDP_DEFINE_FLUSH_XMIT() to define an XDP xmit driver * callback. */ -#define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit) \ +#define libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit, unprep) \ __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_NDO, prep, \ - libeth_xdp_xmit_fill_buf, xmit) + libeth_xdp_xmit_fill_buf, xmit, \ + unprep) u32 libeth_xdp_xmit_return_bulk(const struct libeth_xdp_tx_frame *bq, u32 count, const struct net_device *dev); @@ -1611,12 +1620,13 @@ void name(struct work_struct *work) \ * @xmit: driver callback to write a HW Tx descriptor */ #define LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit) \ - __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, xdp) + __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, libeth_xdp_tx_unprep, \ + xdp) -#define __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, pfx) \ +#define __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, unprep, pfx) \ bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \ { \ - return libeth_##pfx##_tx_flush_bulk(bq, flags, prep, xmit); \ + return libeth_##pfx##_tx_flush_bulk(bq, flags, prep, xmit, unprep); \ } /** @@ -1626,9 +1636,12 @@ bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \ * @xmit: driver callback to write a HW Tx descriptor */ #define LIBETH_XDP_DEFINE_FLUSH_XMIT(name, prep, xmit) \ + __LIBETH_XDP_DEFINE_FLUSH_XMIT(name, prep, xmit, libeth_xdp_tx_unprep) + +#define __LIBETH_XDP_DEFINE_FLUSH_XMIT(name, prep, xmit, unprep) \ bool name(struct libeth_xdp_tx_bulk *bq, u32 flags) \ { \ - return libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit); \ + return libeth_xdp_xmit_flush_bulk(bq, flags, prep, xmit, unprep); \ } /** diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 82b5d21aae8784..a531a8d5c7911e 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -146,9 +146,10 @@ libeth_xsk_tx_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, * Use via LIBETH_XSK_DEFINE_FLUSH_TX() to define an XSk ``XDP_TX`` driver * callback. */ -#define libeth_xsk_tx_flush_bulk(bq, flags, prep, xmit) \ +#define libeth_xsk_tx_flush_bulk(bq, flags, prep, xmit, unprep) \ __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_XSK, prep, \ - libeth_xsk_tx_fill_buf, xmit) + libeth_xsk_tx_fill_buf, xmit, \ + unprep) /* XSk TMO */ @@ -282,12 +283,13 @@ libeth_xsk_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, * Return: false if @budget was exhausted, true otherwise. */ static __always_inline bool -libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, - const struct xsk_tx_metadata_ops *tmo, - u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), - void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, - const struct libeth_xdpsq *sq, u64 priv), - void (*finalize)(void *xdpsq, bool sent, bool flush)) +__libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, + const struct xsk_tx_metadata_ops *tmo, + u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), + void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv), + void (*finalize)(void *xdpsq, bool sent, bool flush), + void (*unprep)(void *xdpsq, struct libeth_xdpsq *sq)) { const struct libeth_xdp_tx_frame *bulk; bool wake; @@ -302,7 +304,8 @@ libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, libeth_xdp_tx_xmit_bulk(bulk, xdpsq, n, true, libeth_xdp_ptr_to_priv(tmo), prep, - libeth_xsk_xmit_fill_buf, xmit); + libeth_xsk_xmit_fill_buf, xmit, + unprep); finalize(xdpsq, n, true); if (wake) @@ -311,6 +314,10 @@ libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, return n < budget; } +#define libeth_xsk_xmit_do_bulk(pool, xdpsq, budget, tmo, prep, xmit, finalize)\ + __libeth_xsk_xmit_do_bulk(pool, xdpsq, budget, tmo, prep, xmit, \ + finalize, libeth_xdp_tx_unprep) + /* Rx polling path */ /** @@ -546,7 +553,8 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @xmit: driver callback to write a HW Tx descriptor */ #define LIBETH_XSK_DEFINE_FLUSH_TX(name, prep, xmit) \ - __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, xsk) + __LIBETH_XDP_DEFINE_FLUSH_TX(name, prep, xmit, libeth_xdp_tx_unprep, \ + xsk) /** * LIBETH_XSK_DEFINE_RUN_PROG - define a driver XDP program run function From a894932d2a5c52fb0bad4c567e27cc8c4bd3a356 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 8 Apr 2026 18:19:09 +0200 Subject: [PATCH 03/13] libeth: make sqe->priv set size While having a platform-dependent priv size improves 32-bit performance, it complicates usage of priv as a container. Make priv a 64-bit int, so structures can be packed there more conveniently. Signed-off-by: Larysa Zaremba --- include/net/libeth/tx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h index c3db5c6f16410c..f3edefe2411da4 100644 --- a/include/net/libeth/tx.h +++ b/include/net/libeth/tx.h @@ -74,7 +74,7 @@ struct libeth_sqe { u32 packets; u32 bytes; - unsigned long priv; + u64 priv; } __aligned_largest; /** From 914cfc237ef84cd91fe6de1d796b0b415516d157 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 19 May 2026 13:05:22 +0200 Subject: [PATCH 04/13] libeth: add option to force XDP queue sharing when initializing bulk Currently, XDP queue sharing, and hence modulo operation in the bulk initialization, are dependent on initialized libeth XDP SQ locks. But when using Tx queues for XDP, we do not use libeth locking. So add a static variant of a buff initializeation function, which will perform modulo operation unconditionally. Signed-off-by: Larysa Zaremba --- include/net/libeth/xdp.h | 25 ++++++++++++++++++------- include/net/libeth/xsk.h | 2 +- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index c44b9260494059..92bc82f5dc05e9 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -132,17 +132,18 @@ static inline bool libeth_xdpsq_shared(u32 num) /** * libeth_xdpsq_id - get XDPSQ index corresponding to this CPU * @num: number of active XDPSQs + * @force_share: always share queues * * Helper for libeth_xdp routines, do not use in drivers directly. * * Return: XDPSQ index needs to be used on this CPU. */ -static inline u32 libeth_xdpsq_id(u32 num) +static inline u32 libeth_xdpsq_id(u32 num, bool force_share) { u32 ret = raw_smp_processor_id(); - if (static_branch_unlikely(&libeth_xdpsq_share) && - libeth_xdpsq_shared(num)) + if ((static_branch_unlikely(&libeth_xdpsq_share) && + libeth_xdpsq_shared(num)) || force_share) ret %= num; return ret; @@ -823,7 +824,12 @@ __libeth_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags, * @num: number of active XDPSQs (the above array length) */ #define libeth_xdp_xmit_init_bulk(bq, dev, xdpsqs, num) \ - __libeth_xdp_xmit_init_bulk(bq, dev, (xdpsqs)[libeth_xdpsq_id(num)]) + __libeth_xdp_xmit_init_bulk(bq, dev, \ + (xdpsqs)[libeth_xdpsq_id(num, false)]) + +#define libeth_xdp_xmit_init_bulk_shared(bq, dev, xdpsqs, num) \ + __libeth_xdp_xmit_init_bulk(bq, dev, \ + (xdpsqs)[libeth_xdpsq_id(num, true)]) static inline void __libeth_xdp_xmit_init_bulk(struct libeth_xdp_tx_bulk *bq, struct net_device *dev, @@ -1148,10 +1154,15 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, * Do not use for XSk, it has its own optimized helper. */ #define libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num) \ - __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, false, \ + __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, false, false, \ + __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) + +#define libeth_xdp_tx_init_bulk_shared(bq, prog, dev, xdpsqs, num) \ + __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, false, true, \ __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) -#define __libeth_xdp_tx_init_bulk(bq, pr, d, xdpsqs, num, xsk, ub, un) do { \ +#define __libeth_xdp_tx_init_bulk(bq, pr, d, xdpsqs, num, xsk, fs, ub, un) \ +do { \ typeof(bq) ub = (bq); \ u32 un = (num); \ \ @@ -1160,7 +1171,7 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, if (un || (xsk)) { \ ub->prog = rcu_dereference(pr); \ ub->dev = (d); \ - ub->xdpsq = (xdpsqs)[libeth_xdpsq_id(un)]; \ + ub->xdpsq = (xdpsqs)[libeth_xdpsq_id(un, fs)]; \ } else { \ ub->prog = NULL; \ } \ diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index a531a8d5c7911e..644bfa02994ab0 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -334,7 +334,7 @@ __libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, * when hitting this path. */ #define libeth_xsk_tx_init_bulk(bq, prog, dev, xdpsqs, num) \ - __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, true, \ + __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, true, false, \ __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) struct libeth_xdp_buff *libeth_xsk_buff_add_frag(struct libeth_xdp_buff *head, From 03390f886fae9bae29a4c9a78c884b2f0b1b19c4 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 8 Apr 2026 18:20:03 +0200 Subject: [PATCH 05/13] ixgbevf: use libeth_tx for sending skbs libeth_tx provides structures and helpers for Tx buffer management. Using it in ixgbevf will allow seemless integration with libeth_xdp and to reduce code volume. Use libeth_tx in Tx buffer management code. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ipsec.c | 8 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 27 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 329 +++++++----------- .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 22 +- 4 files changed, 145 insertions(+), 241 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index fce35924ff8b51..706e123fa92284 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -450,11 +450,11 @@ static const struct xfrmdev_ops ixgbevf_xfrmdev_ops = { * @first: current data packet * @itd: ipsec Tx data for later use in building context descriptor **/ -int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *first, +int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, struct libeth_sqe *first, struct ixgbevf_ipsec_tx_data *itd) { struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); + struct ixgbevf_skb_sqe_priv *sqe_priv = (void *)&first->priv; struct ixgbevf_ipsec *ipsec = adapter->ipsec; struct xfrm_state *xs; struct sec_path *sp; @@ -491,12 +491,12 @@ int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, itd->pfsa = tsa->pfsa - IXGBE_IPSEC_BASE_TX_INDEX; - first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CSUM; + sqe_priv->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CSUM; if (xs->id.proto == IPPROTO_ESP) { itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP | IXGBE_ADVTXD_TUCMD_L4T_TCP; - if (first->protocol == htons(ETH_P_IP)) + if (sqe_priv->protocol == htons(ETH_P_IP)) itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4; /* The actual trailer length is authlen (16 bytes) plus diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3c3ba709e52ba6..acb70cab25cd71 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include "vf.h" @@ -24,24 +24,12 @@ #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -/* wrapper around a pointer to a socket buffer, - * so a DMA handle can be stored along with the buffer - */ -struct ixgbevf_tx_buffer { - union ixgbe_adv_tx_desc *next_to_watch; - unsigned long time_stamp; - union { - struct sk_buff *skb; - /* XDP uses address ptr on irq_clean */ - void *data; - }; - unsigned int bytecount; - unsigned short gso_segs; - __be16 protocol; - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); +struct ixgbevf_skb_sqe_priv { u32 tx_flags; + __be16 protocol; }; +static_assert(sizeof(struct ixgbevf_skb_sqe_priv) <= + sizeof(typeof_member(struct libeth_sqe, priv))); struct ixgbevf_stats { u64 packets; @@ -103,7 +91,7 @@ struct ixgbevf_ring { union { struct libeth_fqe *rx_fqes; struct libeth_xdp_buff **xsk_fqes; - struct ixgbevf_tx_buffer *tx_buffer_info; + struct libeth_sqe *tx_sqes; struct libeth_sqe *xdp_sqes; }; struct libeth_xdpsq_lock xdpq_lock; @@ -456,8 +444,7 @@ void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter); void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb); -int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *first, +int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, struct libeth_sqe *first, struct ixgbevf_ipsec_tx_data *itd); #else static inline void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index b17c53a00b640e..31d128f352b496 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -270,108 +270,55 @@ static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int __always_ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget) { + u16 budget = tx_ring->count / 2, to_clean, ntc = tx_ring->next_to_clean; struct ixgbevf_adapter *adapter = q_vector->adapter; - struct ixgbevf_tx_buffer *tx_buffer; - union ixgbe_adv_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0; - unsigned int budget = tx_ring->count / 2; - unsigned int i = tx_ring->next_to_clean; + struct libeth_sq_napi_stats stats = { }; + unsigned int total_ipsec = 0; + struct libeth_cq_pp cq = { + .ss = &stats, + .dev = tx_ring->dev, + .napi = true, + }; if (test_bit(__IXGBEVF_DOWN, &adapter->state)) return true; - tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBEVF_TX_DESC(tx_ring, i); - i -= tx_ring->count; - - do { - union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; - - /* if next_to_watch is not set then there is no work pending */ - if (!eop_desc) - break; - - /* prevent any other reads prior to eop_desc */ - smp_rmb(); - - /* if DD is not set pending work has not been completed */ - if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) - break; + to_clean = ixgbevf_tx_get_num_sent(tx_ring, budget); + budget = budget > to_clean ? budget - to_clean : 0; - /* clear next_to_watch to prevent false hangs */ - tx_buffer->next_to_watch = NULL; + for (int i = 0; i < to_clean; i++) { + struct ixgbevf_skb_sqe_priv *priv; + struct libeth_sqe *sqe; - /* update the statistics for this packet */ - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC) + sqe = &tx_ring->tx_sqes[ntc]; + priv = (void *)&sqe->priv; + if (priv->tx_flags & IXGBE_TX_FLAGS_IPSEC) total_ipsec++; - /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - - /* clear tx_buffer data */ - dma_unmap_len_set(tx_buffer, len, 0); - - /* unmap remaining buffers */ - while (tx_desc != eop_desc) { - tx_buffer++; - tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; - tx_buffer = tx_ring->tx_buffer_info; - tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); - } - - /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); - } - } - - /* move us one more past the eop_desc for start of next pkt */ - tx_buffer++; - tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; - tx_buffer = tx_ring->tx_buffer_info; - tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); - } + libeth_tx_complete(sqe, &cq); - /* issue prefetch for next Tx descriptor */ - prefetch(tx_desc); + if (unlikely(++ntc == tx_ring->count)) + ntc = 0; + } - /* update budget accounting */ - budget--; - } while (likely(budget)); + smp_wmb(); - i += tx_ring->count; - tx_ring->next_to_clean = i; + tx_ring->next_to_clean = ntc; u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.packets += total_packets; + tx_ring->stats.bytes += stats.bytes; + tx_ring->stats.packets += stats.packets; u64_stats_update_end(&tx_ring->syncp); - q_vector->tx.total_bytes += total_bytes; - q_vector->tx.total_packets += total_packets; + q_vector->tx.total_bytes += stats.bytes; + q_vector->tx.total_packets += stats.packets; adapter->tx_ipsec += total_ipsec; if (check_for_tx_hang(tx_ring) && ixgbevf_check_tx_hang(tx_ring)) { struct ixgbe_hw *hw = &adapter->hw; union ixgbe_adv_tx_desc *eop_desc; + u32 rs_idx; - eop_desc = tx_ring->tx_buffer_info[i].next_to_watch; + rs_idx = tx_ring->tx_sqes[ntc].rs_idx; + eop_desc = rs_idx ? IXGBEVF_TX_DESC(tx_ring, rs_idx - 1) : NULL; pr_err("Detected Tx Unit Hang%s\n" " Tx Queue <%d>\n" @@ -380,16 +327,13 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, " next_to_clean <%x>\n" "tx_buffer_info[next_to_clean]\n" " next_to_watch <%p>\n" - " eop_desc->wb.status <%x>\n" - " time_stamp <%lx>\n" - " jiffies <%lx>\n", + " eop_desc->wb.status <%x>\n", ring_is_xdp(tx_ring) ? " XDP" : "", tx_ring->queue_index, IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)), IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)), - tx_ring->next_to_use, i, - eop_desc, (eop_desc ? eop_desc->wb.status : 0), - tx_ring->tx_buffer_info[i].time_stamp, jiffies); + tx_ring->next_to_use, ntc, + eop_desc, (eop_desc ? eop_desc->wb.status : 0)); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); @@ -401,7 +345,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, } #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) - if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && + if (unlikely(stats.packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. @@ -1212,15 +1156,10 @@ void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - if (!ring_is_xdp(ring)) { - memset(ring->tx_buffer_info, 0, - sizeof(struct ixgbevf_tx_buffer) * ring->count); - } else { - memset(ring->xdp_sqes, 0, - sizeof(struct libeth_sqe) * ring->count); + memset(ring->xdp_sqes, 0, sizeof(struct libeth_sqe) * ring->count); + if (ring_is_xdp(ring)) libeth_xdpsq_get(&ring->xdpq_lock, ring->netdev, num_possible_cpus() > adapter->num_xdp_queues); - } ring->xsk_pool = ixgbevf_xsk_pool_from_q(ring); if (ring_is_xdp(ring) && ring->xsk_pool) @@ -2040,51 +1979,22 @@ void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) **/ void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { - u16 i = tx_ring->next_to_clean; - struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; - - while (i != tx_ring->next_to_use) { - union ixgbe_adv_tx_desc *eop_desc, *tx_desc; - - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); + struct libeth_sq_napi_stats stats = { }; + u16 ntc = tx_ring->next_to_clean; + struct libeth_cq_pp cq = { + .dev = tx_ring->dev, + .ss = &stats, + }; - /* check for eop_desc to determine the end of the packet */ - eop_desc = tx_buffer->next_to_watch; - tx_desc = IXGBEVF_TX_DESC(tx_ring, i); + for (int i = 0; i < tx_ring->pending; i++) { + struct libeth_sqe *sqe; - /* unmap remaining buffers */ - while (tx_desc != eop_desc) { - tx_buffer++; - tx_desc++; - i++; - if (unlikely(i == tx_ring->count)) { - i = 0; - tx_buffer = tx_ring->tx_buffer_info; - tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); - } + sqe = &tx_ring->tx_sqes[ntc]; - /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } + libeth_tx_complete(sqe, &cq); - /* move us one more past the eop_desc for start of next pkt */ - tx_buffer++; - i++; - if (unlikely(i == tx_ring->count)) { - i = 0; - tx_buffer = tx_ring->tx_buffer_info; - } + if (unlikely(++ntc == tx_ring->count)) + ntc = 0; } /* reset next_to_use and next_to_clean */ @@ -3043,8 +2953,8 @@ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) else ixgbevf_clean_xdp_ring(tx_ring); - vfree(tx_ring->tx_buffer_info); - tx_ring->tx_buffer_info = NULL; + vfree(tx_ring->tx_sqes); + tx_ring->tx_sqes = NULL; /* if not set, then don't free */ if (!tx_ring->desc) @@ -3085,11 +2995,10 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; - size = (!ring_is_xdp(tx_ring) ? sizeof(struct ixgbevf_tx_buffer) : - sizeof(struct libeth_sqe)) * tx_ring->count; + size = sizeof(struct libeth_sqe) * tx_ring->count; - tx_ring->tx_buffer_info = vmalloc(size); - if (!tx_ring->tx_buffer_info) + tx_ring->tx_sqes = vmalloc(size); + if (!tx_ring->tx_sqes) goto err; u64_stats_init(&tx_ring->syncp); @@ -3106,8 +3015,8 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) return 0; err: - vfree(tx_ring->tx_buffer_info); - tx_ring->tx_buffer_info = NULL; + vfree(tx_ring->tx_sqes); + tx_ring->tx_sqes = NULL; hw_dbg(&adapter->hw, "Unable to allocate memory for the transmit descriptor ring\n"); return -ENOMEM; } @@ -3427,11 +3336,10 @@ static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring, context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } -static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *first, - u8 *hdr_len, - struct ixgbevf_ipsec_tx_data *itd) +static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, struct libeth_sqe *first, + u8 *hdr_len, struct ixgbevf_ipsec_tx_data *itd) { + struct ixgbevf_skb_sqe_priv *priv = (void *)&first->priv; u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; struct sk_buff *skb = first->skb; union { @@ -3457,7 +3365,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, if (err < 0) return err; - if (eth_p_mpls(first->protocol)) + if (eth_p_mpls(priv->protocol)) ip.hdr = skb_inner_network_header(skb); else ip.hdr = skb_network_header(skb); @@ -3482,12 +3390,12 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; ip.v4->tot_len = 0; - first->tx_flags |= IXGBE_TX_FLAGS_TSO | + priv->tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM | IXGBE_TX_FLAGS_IPV4; } else { ip.v6->payload_len = 0; - first->tx_flags |= IXGBE_TX_FLAGS_TSO | + priv->tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM; } @@ -3502,8 +3410,8 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* update gso size and bytecount with header size */ - first->gso_segs = skb_shinfo(skb)->gso_segs; - first->bytecount += (first->gso_segs - 1) * *hdr_len; + first->packets = skb_shinfo(skb)->gso_segs; + first->bytes += (first->packets - 1) * *hdr_len; /* mss_l4len_id: use 1 as index for TSO */ mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT; @@ -3516,7 +3424,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ vlan_macip_lens = l4.hdr - ip.hdr; vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; + vlan_macip_lens |= priv->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, mss_l4len_idx); @@ -3525,9 +3433,10 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, } static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *first, + struct libeth_sqe *first, struct ixgbevf_ipsec_tx_data *itd) { + struct ixgbevf_skb_sqe_priv *priv = (void *)&first->priv; struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; u32 fceof_saidx = 0; @@ -3554,17 +3463,17 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, goto no_csum; } - if (first->protocol == htons(ETH_P_IP)) + if (priv->protocol == htons(ETH_P_IP)) type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; /* update TX checksum flag */ - first->tx_flags |= IXGBE_TX_FLAGS_CSUM; + priv->tx_flags |= IXGBE_TX_FLAGS_CSUM; vlan_macip_lens = skb_checksum_start_offset(skb) - skb_network_offset(skb); no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; + vlan_macip_lens |= priv->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; fceof_saidx |= itd->pfsa; type_tucmd |= itd->flags | itd->trailer_len; @@ -3621,20 +3530,21 @@ static void ixgbevf_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, } static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, - struct ixgbevf_tx_buffer *first, + struct libeth_sqe *first, const u8 hdr_len) { + struct ixgbevf_skb_sqe_priv *priv = (void *)&first->priv; struct sk_buff *skb = first->skb; - struct ixgbevf_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; + u32 tx_flags = priv->tx_flags; + struct libeth_sqe *used_sqe; skb_frag_t *frag; dma_addr_t dma; unsigned int data_len, size; - u32 tx_flags = first->tx_flags; __le32 cmd_type = ixgbevf_tx_cmd_type(tx_flags); - u16 i = tx_ring->next_to_use; + u16 ntu = tx_ring->next_to_use, done = 0; - tx_desc = IXGBEVF_TX_DESC(tx_ring, i); + tx_desc = IXGBEVF_TX_DESC(tx_ring, ntu); ixgbevf_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len); @@ -3643,15 +3553,16 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); - tx_buffer = first; - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { - if (dma_mapping_error(tx_ring->dev, dma)) + + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) goto dma_error; /* record length, and DMA address */ - dma_unmap_len_set(tx_buffer, len, size); - dma_unmap_addr_set(tx_buffer, dma, dma); + used_sqe = &tx_ring->tx_sqes[ntu]; + dma_unmap_len_set(used_sqe, len, size); + dma_unmap_addr_set(used_sqe, dma, dma); + used_sqe->type = LIBETH_SQE_FRAG; tx_desc->read.buffer_addr = cpu_to_le64(dma); @@ -3659,11 +3570,12 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD); - i++; + ntu++; + done++; tx_desc++; - if (i == tx_ring->count) { + if (ntu == tx_ring->count) { tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); - i = 0; + ntu = 0; } tx_desc->read.olinfo_status = 0; @@ -3678,11 +3590,12 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); - i++; + ntu++; + done++; tx_desc++; - if (i == tx_ring->count) { + if (ntu == tx_ring->count) { tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); - i = 0; + ntu = 0; } tx_desc->read.olinfo_status = 0; @@ -3691,18 +3604,14 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, DMA_TO_DEVICE); - - tx_buffer = &tx_ring->tx_buffer_info[i]; } /* write last descriptor with RS and EOP bits */ cmd_type |= cpu_to_le32(size) | cpu_to_le32(IXGBE_TXD_CMD); tx_desc->read.cmd_type_len = cmd_type; - /* set the timestamp */ - first->time_stamp = jiffies; - skb_tx_timestamp(skb); + first->type = LIBETH_SQE_SKB; /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered @@ -3714,47 +3623,49 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, wmb(); /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; + first->rs_idx = ntu + 1; - i++; - if (i == tx_ring->count) - i = 0; + ntu++; + done++; + if (ntu == tx_ring->count) + ntu = 0; - tx_ring->next_to_use = i; + tx_ring->next_to_use = ntu; + tx_ring->pending += done; /* notify HW of packet */ - ixgbevf_write_tail(tx_ring, i); + ixgbevf_write_tail(tx_ring, ntu); return; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); - tx_buffer = &tx_ring->tx_buffer_info[i]; + used_sqe = &tx_ring->tx_sqes[ntu]; /* clear dma mappings for failed tx_buffer_info map */ - while (tx_buffer != first) { - if (dma_unmap_len(tx_buffer, len)) + while (used_sqe != first) { + if (dma_unmap_len(used_sqe, len)) dma_unmap_page(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), + dma_unmap_addr(used_sqe, dma), + dma_unmap_len(used_sqe, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); + dma_unmap_len_set(used_sqe, len, 0); - if (i-- == 0) - i += tx_ring->count; - tx_buffer = &tx_ring->tx_buffer_info[i]; + if (ntu-- == 0) + ntu += tx_ring->count; + used_sqe = &tx_ring->tx_sqes[ntu]; } - if (dma_unmap_len(tx_buffer, len)) + if (dma_unmap_len(used_sqe, len)) dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), + dma_unmap_addr(used_sqe, dma), + dma_unmap_len(used_sqe, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); + dma_unmap_len_set(used_sqe, len, 0); - dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; + dev_kfree_skb_any(used_sqe->skb); + used_sqe->skb = NULL; - tx_ring->next_to_use = i; + tx_ring->next_to_use = ntu; } static int __ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) @@ -3789,7 +3700,8 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, struct ixgbevf_ring *tx_ring) { - struct ixgbevf_tx_buffer *first; + struct ixgbevf_skb_sqe_priv *priv; + struct libeth_sqe *first; int tso; u32 tx_flags = 0; u16 count = TXD_USE_COUNT(skb_headlen(skb)); @@ -3826,10 +3738,11 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, } /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first = &tx_ring->tx_sqes[tx_ring->next_to_use]; + priv = (void *)&first->priv; first->skb = skb; - first->bytecount = skb->len; - first->gso_segs = 1; + first->bytes = skb->len; + first->packets = 1; if (skb_vlan_tag_present(skb)) { tx_flags |= skb_vlan_tag_get(skb); @@ -3838,8 +3751,8 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, } /* record initial flags and protocol */ - first->tx_flags = tx_flags; - first->protocol = vlan_get_protocol(skb); + priv->tx_flags = tx_flags; + priv->protocol = vlan_get_protocol(skb); #ifdef CONFIG_IXGBEVF_IPSEC if (xfrm_offload(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index ad1e09ae6aa1c3..f76850185a2d88 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -8,29 +8,33 @@ #include "ixgbevf.h" -static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) +static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *tx_ring, + u16 budget) { - u16 ntc = xdp_ring->next_to_clean; + u16 ntc = tx_ring->next_to_clean; u16 to_clean = 0; - while (likely(to_clean < xdp_ring->pending)) { - u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + while (likely(to_clean < tx_ring->pending) && + likely(to_clean < budget)) { + u32 idx = tx_ring->xdp_sqes[ntc].rs_idx; union ixgbe_adv_tx_desc *rs_desc; if (!idx--) break; - rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); + smp_rmb(); + + rs_desc = IXGBEVF_TX_DESC(tx_ring, idx); if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; - xdp_ring->xdp_sqes[ntc].rs_idx = 0; + tx_ring->xdp_sqes[ntc].rs_idx = 0; to_clean += - (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + (idx >= ntc ? idx : idx + tx_ring->count) - ntc + 1; - ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + ntc = (idx + 1 == tx_ring->count) ? 0 : idx + 1; } return to_clean; @@ -74,7 +78,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) libeth_xdpsq_lock(&xdp_ring->xdpq_lock); if (unlikely(ixgbevf_desc_unused(xdp_ring) < xdp_ring->thresh)) { - u16 to_clean = ixgbevf_tx_get_num_sent(xdpsq); + u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring, xdp_ring->count); if (likely(to_clean)) ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); From ac6e350a8a848c0e6f1956c8554b31a72efce710 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 1 Jul 2026 14:00:08 +0200 Subject: [PATCH 06/13] fixup! ixgbevf: use libeth_tx for sending skbs --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 31d128f352b496..0fe3cc12b954e5 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3542,7 +3542,7 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, dma_addr_t dma; unsigned int data_len, size; __le32 cmd_type = ixgbevf_tx_cmd_type(tx_flags); - u16 ntu = tx_ring->next_to_use, done = 0; + u16 ntu = tx_ring->next_to_use; tx_desc = IXGBEVF_TX_DESC(tx_ring, ntu); @@ -3571,7 +3571,6 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD); ntu++; - done++; tx_desc++; if (ntu == tx_ring->count) { tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); @@ -3591,7 +3590,6 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); ntu++; - done++; tx_desc++; if (ntu == tx_ring->count) { tx_desc = IXGBEVF_TX_DESC(tx_ring, 0); @@ -3626,12 +3624,10 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, first->rs_idx = ntu + 1; ntu++; - done++; if (ntu == tx_ring->count) ntu = 0; tx_ring->next_to_use = ntu; - tx_ring->pending += done; /* notify HW of packet */ ixgbevf_write_tail(tx_ring, ntu); From 724dd0aa72015c758a4236a843ef1eaa4928c37d Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 12 May 2026 16:04:46 +0200 Subject: [PATCH 07/13] fixup! ixgbevf: use libeth_tx for sending skbs --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++- drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0fe3cc12b954e5..30209b876968ad 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -283,7 +283,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, if (test_bit(__IXGBEVF_DOWN, &adapter->state)) return true; - to_clean = ixgbevf_tx_get_num_sent(tx_ring, budget); + to_clean = ixgbevf_desc_used(tx_ring); + to_clean = ixgbevf_tx_get_num_sent(tx_ring, min_t(u16, budget, to_clean)); budget = budget > to_clean ? budget - to_clean : 0; for (int i = 0; i < to_clean; i++) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index f76850185a2d88..94354a9c40dc6f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -14,8 +14,7 @@ static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *tx_ring, u16 ntc = tx_ring->next_to_clean; u16 to_clean = 0; - while (likely(to_clean < tx_ring->pending) && - likely(to_clean < budget)) { + while (likely(to_clean < budget)) { u32 idx = tx_ring->xdp_sqes[ntc].rs_idx; union ixgbe_adv_tx_desc *rs_desc; @@ -78,7 +77,8 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) libeth_xdpsq_lock(&xdp_ring->xdpq_lock); if (unlikely(ixgbevf_desc_unused(xdp_ring) < xdp_ring->thresh)) { - u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring, xdp_ring->count); + u16 to_clean = + ixgbevf_tx_get_num_sent(xdp_ring, xdp_ring->pending); if (likely(to_clean)) ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); From 31c1176d2ac233aaa34e72c8700704a27e36b259 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 1 Jul 2026 13:53:06 +0200 Subject: [PATCH 08/13] ixgbevf: support XDP and skb transmission from the same queue VFs supported by ixgbevf driver can have pretty limited queue number, e.g. only 4. User may wish to use all 4 netdev queues and XDP simultaneously. Such feature is already provided by igc and igb drivers. Based on igb and igc approach, add a shared (between XDP and skb Tx) queue mode, which is used, if current configured number of channels is more than half of number of available queue pairs. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 8 ++ .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 97 +++++++++++++------ .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 92 +++++++++++++++++- 3 files changed, 168 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index acb70cab25cd71..ebdc3304e768eb 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -233,6 +233,14 @@ static inline u16 ixgbevf_desc_unused(struct ixgbevf_ring *ring) return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; } +static inline u16 ixgbevf_desc_used(struct ixgbevf_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntu >= ntc) ? 0 : ring->count) + ntu - ntc; +} + static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value) { writel(value, ring->tail); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 30209b876968ad..367860107661ff 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -273,16 +273,20 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, u16 budget = tx_ring->count / 2, to_clean, ntc = tx_ring->next_to_clean; struct ixgbevf_adapter *adapter = q_vector->adapter; struct libeth_sq_napi_stats stats = { }; + struct xdp_frame_bulk xdp_bulk; unsigned int total_ipsec = 0; struct libeth_cq_pp cq = { .ss = &stats, .dev = tx_ring->dev, .napi = true, + .bq = &xdp_bulk, }; if (test_bit(__IXGBEVF_DOWN, &adapter->state)) return true; + xdp_frame_bulk_init(&xdp_bulk); + to_clean = ixgbevf_desc_used(tx_ring); to_clean = ixgbevf_tx_get_num_sent(tx_ring, min_t(u16, budget, to_clean)); budget = budget > to_clean ? budget - to_clean : 0; @@ -296,11 +300,12 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, if (priv->tx_flags & IXGBE_TX_FLAGS_IPSEC) total_ipsec++; - libeth_tx_complete(sqe, &cq); + libeth_tx_complete_any(sqe, &cq); if (unlikely(++ntc == tx_ring->count)) ntc = 0; } + xdp_flush_frame_bulk(&xdp_bulk); smp_wmb(); @@ -491,11 +496,30 @@ static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, LIBETH_XDP_DEFINE_START(); LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, ixgbevf_xdp_xmit_desc); +__LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx_shared, + ixgbevf_prep_tx_sq, ixgbevf_xdp_xmit_desc, + ixgbevf_xdp_tx_unprep, xdp); LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbevf_xdp_flush_xmit, ixgbevf_prep_xdp_sq, ixgbevf_xdp_xmit_desc); -LIBETH_XDP_DEFINE_RUN_PROG(static ixgbevf_xdp_run_prog, ixgbevf_xdp_flush_tx); +__LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbevf_xdp_flush_xmit_shared, + ixgbevf_prep_tx_sq, ixgbevf_xdp_xmit_desc, + ixgbevf_xdp_tx_unprep); +LIBETH_XDP_DEFINE_END(); + +static bool ixgbevf_xdp_flush_common(struct libeth_xdp_tx_bulk *bq, u32 flags) +{ + const struct ixgbevf_ring *ring = bq->xdpsq; + + if (test_bit(__IXGBEVF_TX_XDP_RING, &ring->state)) + return ixgbevf_xdp_flush_tx(bq, flags); + else + return ixgbevf_xdp_flush_tx_shared(bq, flags); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_RUN_PROG(static ixgbevf_xdp_run_prog, ixgbevf_xdp_flush_common); LIBETH_XDP_DEFINE_FINALIZE(static ixgbevf_xdp_finalize_xdp_napi, - ixgbevf_xdp_flush_tx, ixgbevf_xdp_rs_and_bump); + ixgbevf_xdp_flush_common, ixgbevf_xdp_rs_and_bump); LIBETH_XDP_DEFINE_END(); static u32 ixgbevf_rx_hsplit_wa(const struct libeth_fqe *hdr, @@ -535,9 +559,15 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, LIBETH_XDP_ONSTACK_BUFF(xdp); libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); - libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, - adapter->netdev, adapter->xdp_ring, - adapter->num_xdp_queues); + if (adapter->num_xdp_queues) + libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + else + libeth_xdp_tx_init_bulk_shared(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, + adapter->tx_ring, + adapter->num_tx_queues); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -651,13 +681,26 @@ static int ixgbevf_xdp_xmit(struct net_device *dev, int n, if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) return -ENETDOWN; - if (unlikely(!adapter->num_xdp_queues)) + if (unlikely(!READ_ONCE(adapter->xdp_prog))) return -ENXIO; - return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, adapter->xdp_ring, - adapter->num_xdp_queues, - ixgbevf_xdp_flush_xmit, - ixgbevf_xdp_rs_and_bump); + if (adapter->num_xdp_queues) + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, + adapter->xdp_ring, + adapter->num_xdp_queues, + ixgbevf_xdp_flush_xmit, + ixgbevf_xdp_rs_and_bump); + else { + LIBETH_XDP_ONSTACK_BULK(xdp_bulk); + + libeth_xdp_xmit_init_bulk_shared(&xdp_bulk, dev, + adapter->tx_ring, + adapter->num_tx_queues); + + return __libeth_xdp_xmit_do_bulk(&xdp_bulk, frames, n, flags, + ixgbevf_xdp_flush_xmit_shared, + ixgbevf_xdp_rs_and_bump); + } } /** @@ -1982,21 +2025,28 @@ void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { struct libeth_sq_napi_stats stats = { }; u16 ntc = tx_ring->next_to_clean; + struct xdp_frame_bulk xdp_bulk; struct libeth_cq_pp cq = { .dev = tx_ring->dev, .ss = &stats, + .bq = &xdp_bulk, }; + xdp_frame_bulk_init(&xdp_bulk); + + tx_ring->pending = ixgbevf_desc_used(tx_ring); + for (int i = 0; i < tx_ring->pending; i++) { struct libeth_sqe *sqe; sqe = &tx_ring->tx_sqes[ntc]; - libeth_tx_complete(sqe, &cq); + libeth_tx_complete_any(sqe, &cq); if (unlikely(++ntc == tx_ring->count)) ntc = 0; } + xdp_flush_frame_bulk(&xdp_bulk); /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; @@ -2187,13 +2237,9 @@ static void ixgbevf_cfg_queue_caps(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_17: max_qpairs = min_t(u32, hw->mac.max_rx_queues, hw->mac.max_tx_queues); - if (adapter->xdp_prog) - max_qpairs = min_t(u32, max_qpairs, - hw->mac.max_tx_queues / 2); break; default: - max_qpairs = adapter->xdp_prog ? IXGBEVF_MAX_RSS_QUEUES / 2 : - IXGBEVF_MAX_RSS_QUEUES; + max_qpairs = IXGBEVF_MAX_RSS_QUEUES; break; } @@ -2234,8 +2280,11 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) adapter->num_rx_queues = min_t(u32, adapter->num_req_qpairs, max_qpairs); adapter->num_tx_queues = adapter->num_rx_queues; - adapter->num_xdp_queues = adapter->xdp_prog ? - adapter->num_rx_queues : 0; + if (adapter->num_rx_queues + adapter->num_tx_queues <= + adapter->q_caps.max_txqs && adapter->xdp_prog) + adapter->num_xdp_queues = adapter->num_rx_queues; + else + adapter->num_xdp_queues = 0; } } @@ -3622,13 +3671,14 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, wmb(); /* set next_to_watch value indicating a packet is present */ - first->rs_idx = ntu + 1; + tx_ring->xdp_sqes[tx_ring->cached_ntu].rs_idx = ntu + 1; ntu++; if (ntu == tx_ring->count) ntu = 0; tx_ring->next_to_use = ntu; + tx_ring->cached_ntu = ntu; /* notify HW of packet */ ixgbevf_write_tail(tx_ring, ntu); @@ -4032,13 +4082,6 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, return -EOPNOTSUPP; } - if (!adapter->xdp_prog && prog && - adapter->num_req_qpairs * 2 > adapter->q_caps.max_txqs) { - NL_SET_ERR_MSG_MOD(extack, - "Number of configured queue pairs should be half of the maximum or less to configure XDP"); - return -EINVAL; - } - old_prog = xchg(&adapter->xdp_prog, prog); /* If transitioning XDP modes reconfigure rings */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index 94354a9c40dc6f..d3dd6cd9f48dcb 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -121,13 +121,98 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) return ixgbevf_desc_unused(xdp_ring); } +static inline struct netdev_queue * +ixgbevf_xdp_tx_get_nq(struct ixgbevf_ring *tx_ring) +{ + struct netdev_queue *nq; + + nq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); + __netif_tx_lock(nq, smp_processor_id()); + + return nq; +} + +static inline void ixgbevf_xdp_tx_put_nq(struct ixgbevf_ring *tx_ring) +{ + struct netdev_queue *nq; + + nq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); + __netif_tx_unlock(nq); +} + +static inline u32 ixgbevf_prep_tx_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbe_adv_tx_context_desc *context_desc; + struct ixgbevf_ring *tx_ring = xdpsq; + struct netdev_queue *nq; + u32 num_unused, ntu; + + /* Serialization of producers in .ndo_start_xmit(), + * .ndo_xdp_xmit() and XDP_TX + */ + nq = ixgbevf_xdp_tx_get_nq(tx_ring); + + /* We need at least 1 additional descriptor for context */ + num_unused = ixgbevf_desc_unused(tx_ring); + if (num_unused < 2) + return 0; + + /* Inform the stack that queue is transmitting to avoid Tx timeout */ + txq_trans_cond_update(nq); + + /* Shared TxQ cleaning was done beforehand */ + + /* Instead of sending a context descriptor once for an XDP-only ring, + * do this before sending each bulk + */ + ntu = tx_ring->next_to_use; + context_desc = IXGBEVF_TX_CTXTDESC(tx_ring, ntu); + tx_ring->xdp_sqes[ntu].type = LIBETH_SQE_CTX; + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + ntu++; + num_unused--; + ntu = ntu == tx_ring->count ? 0 : ntu; + tx_ring->next_to_use = ntu; + + *sq = (struct libeth_xdpsq) { + .count = tx_ring->count, + .descs = tx_ring->desc, + .lock = &tx_ring->xdpq_lock, + .ntu = &tx_ring->next_to_use, + /* This value is ignored in shared queues */ + .pending = &tx_ring->pending, + .pool = NULL, + .sqes = tx_ring->xdp_sqes, + }; + + return num_unused; +} + +static inline void +ixgbevf_xdp_tx_unprep(void *xdpsq, struct libeth_xdpsq *sq __always_unused) +{ + ixgbevf_xdp_tx_put_nq(xdpsq); +} + static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) { struct ixgbevf_ring *xdp_ring = xdpsq; union ixgbe_adv_tx_desc *desc; + bool is_shared; u32 ltu; - libeth_xdpsq_lock(&xdp_ring->xdpq_lock); + is_shared = !test_bit(__IXGBEVF_TX_XDP_RING, &xdp_ring->state); + + if (is_shared) + ixgbevf_xdp_tx_get_nq(xdp_ring); + else + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || xdp_ring->cached_ntu == xdp_ring->next_to_use) @@ -153,7 +238,10 @@ static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); unlock: - libeth_xdpsq_unlock(&xdp_ring->xdpq_lock); + if (is_shared) + ixgbevf_xdp_tx_put_nq(xdp_ring); + else + libeth_xdpsq_unlock(&xdp_ring->xdpq_lock); } #endif /* _IXGBEVF_XDP_LIB_H_ */ From 32e0787bb858f75d333198dafa73579f6bc07128 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 20 May 2026 12:44:17 +0200 Subject: [PATCH 09/13] [TMP] ixgbevf: report timeout and force shared queues --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 367860107661ff..cabdd45478149f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -254,10 +254,14 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter) * @netdev: network interface device structure * @txqueue: transmit queue hanging (unused) **/ -static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) +static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); + struct ixgbevf_ring *tx_ring = adapter->tx_ring[txqueue]; + u32 ntc = tx_ring->next_to_clean; + netdev_err(netdev, "Timeout info:\n"); + netdev_err(netdev, "ntu=%u, ntc=%u, rs_idx=%u, cached_ntu=%u\n", tx_ring->next_to_use, ntc, tx_ring->tx_sqes[ntc].rs_idx, tx_ring->cached_ntu); ixgbevf_tx_timeout_reset(adapter); } @@ -2280,10 +2284,10 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) adapter->num_rx_queues = min_t(u32, adapter->num_req_qpairs, max_qpairs); adapter->num_tx_queues = adapter->num_rx_queues; - if (adapter->num_rx_queues + adapter->num_tx_queues <= - adapter->q_caps.max_txqs && adapter->xdp_prog) - adapter->num_xdp_queues = adapter->num_rx_queues; - else + // if (adapter->num_rx_queues + adapter->num_tx_queues <= + // adapter->q_caps.max_txqs && adapter->xdp_prog) + // adapter->num_xdp_queues = adapter->num_rx_queues; + // else adapter->num_xdp_queues = 0; } } From f67df558ada0b3296d91e3504a1580e096cb0dbf Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 3 Jul 2026 12:56:57 +0200 Subject: [PATCH 10/13] libeth: xsk: do not flush bulk, if it contains no frames When AF_XDP ZC socket utilizes Rx much more than Tx, libeth_xsk_xmit_do_bulk() will often be called without any frames to xmit. This leads to unneccessary calls to the prep() callback, which in case of shared queues includes locking. Furthermore, ixgbevf HW requires proper context descriptors for transmission. This is not a big deal, when the ring is XDP-only, since one descriptor at the start is enough. But when a single ring is shared between XDP and regular skb processing, context needs to be reset before each bulk, so from the prep() callback. This leads to multiple unused context descriptors on the ring, which triggers an MDD event for this VF. Check whether we actually have any buffers to transmit before flushing bulk, and hence prepping Tx queue. Signed-off-by: Larysa Zaremba --- include/net/libeth/xsk.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 644bfa02994ab0..2b32ed046f892c 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -300,6 +300,8 @@ __libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, xsk_clear_tx_need_wakeup(pool); n = xsk_tx_peek_release_desc_batch(pool, budget); + if (unlikely(!n)) + return true; bulk = container_of(&pool->tx_descs[0], typeof(*bulk), desc); libeth_xdp_tx_xmit_bulk(bulk, xdpsq, n, true, From 65539043301d8489a03be66822fd5b6350c4c12c Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 3 Jul 2026 14:08:03 +0200 Subject: [PATCH 11/13] fixup! libeth: add option to force XDP queue sharing when initializing bulk --- include/net/libeth/xsk.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 2b32ed046f892c..fa66b740994ca4 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -339,6 +339,10 @@ __libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, true, false, \ __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) +#define libeth_xsk_tx_init_bulk_shared(bq, prog, dev, xdpsqs, num) \ + __libeth_xdp_tx_init_bulk(bq, prog, dev, xdpsqs, num, true, true, \ + __UNIQUE_ID(bq_), __UNIQUE_ID(nqs_)) + struct libeth_xdp_buff *libeth_xsk_buff_add_frag(struct libeth_xdp_buff *head, struct libeth_xdp_buff *xdp); From 4186f6971dc7d457a135989465144cb874f2d8e7 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 3 Jul 2026 14:20:06 +0200 Subject: [PATCH 12/13] fixup! ixgbevf: implement AF_XDP ZC initialization --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index a16e88048f8313..9bb4477f43e979 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -37,6 +37,8 @@ static void ixgbevf_qp_dis(struct ixgbevf_adapter *adapter, u16 qid) ixgbevf_single_irq_disable(adapter, q_vector->v_idx); napi_disable(&q_vector->napi); + synchronize_net(); + ixgbevf_disable_rx_queue(adapter, adapter->rx_ring[qid]); ixgbevf_clean_rx_ring(rx_ring); ixgbevf_rx_destroy_pp(rx_ring); From 21dc491eb121f70d83b094fa0df9d184a4f6f9f1 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 3 Jul 2026 14:25:41 +0200 Subject: [PATCH 13/13] ixgbevf: support AF_XDP ZC on shared queues Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 102 +++++++++--------- .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 2 +- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 55 +++++++--- 3 files changed, 94 insertions(+), 65 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index cabdd45478149f..1a386b345978bd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -265,51 +265,68 @@ static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue) ixgbevf_tx_timeout_reset(adapter); } -/** - * ixgbevf_clean_tx_irq - Reclaim resources after transmit completes - * @q_vector: board private structure - * @tx_ring: tx ring to clean - * @napi_budget: Used to determine if we are in netpoll - **/ -static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, - struct ixgbevf_ring *tx_ring, int napi_budget) +static u16 ixgbevf_clean_tx_num(struct ixgbevf_ring *tx_ring, bool in_napi, + u16 to_clean, + struct libeth_sq_napi_stats *stats, + u16 *total_ipsec) { - u16 budget = tx_ring->count / 2, to_clean, ntc = tx_ring->next_to_clean; - struct ixgbevf_adapter *adapter = q_vector->adapter; - struct libeth_sq_napi_stats stats = { }; + bool xsk_ring = ring_is_xsk(tx_ring); + u16 ntc = tx_ring->next_to_clean; struct xdp_frame_bulk xdp_bulk; - unsigned int total_ipsec = 0; struct libeth_cq_pp cq = { - .ss = &stats, + .ss = stats, .dev = tx_ring->dev, - .napi = true, + .napi = in_napi, .bq = &xdp_bulk, }; - - if (test_bit(__IXGBEVF_DOWN, &adapter->state)) - return true; + u32 xsk_frames = 0; xdp_frame_bulk_init(&xdp_bulk); - - to_clean = ixgbevf_desc_used(tx_ring); - to_clean = ixgbevf_tx_get_num_sent(tx_ring, min_t(u16, budget, to_clean)); - budget = budget > to_clean ? budget - to_clean : 0; - for (int i = 0; i < to_clean; i++) { struct ixgbevf_skb_sqe_priv *priv; struct libeth_sqe *sqe; sqe = &tx_ring->tx_sqes[ntc]; priv = (void *)&sqe->priv; - if (priv->tx_flags & IXGBE_TX_FLAGS_IPSEC) - total_ipsec++; + if (priv->tx_flags & IXGBE_TX_FLAGS_IPSEC && + likely(total_ipsec)) + (*total_ipsec)++; + xsk_frames += xsk_ring && !sqe->type ? 1 : 0; libeth_tx_complete_any(sqe, &cq); if (unlikely(++ntc == tx_ring->count)) ntc = 0; } + xdp_flush_frame_bulk(&xdp_bulk); + if (xsk_frames) + xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); + + return ntc; +} + +/** + * ixgbevf_clean_tx_irq - Reclaim resources after transmit completes + * @q_vector: board private structure + * @tx_ring: tx ring to clean + * @napi_budget: Used to determine if we are in netpoll + **/ +static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget) +{ + struct ixgbevf_adapter *adapter = q_vector->adapter; + u16 budget = tx_ring->count / 2, to_clean, ntc; + struct libeth_sq_napi_stats stats = { }; + u16 total_ipsec; + + if (test_bit(__IXGBEVF_DOWN, &adapter->state)) + return true; + + to_clean = ixgbevf_desc_used(tx_ring); + to_clean = ixgbevf_tx_get_num_sent(tx_ring, min_t(u16, budget, to_clean)); + budget = budget > to_clean ? budget - to_clean : 0; + ntc = ixgbevf_clean_tx_num(tx_ring, true, to_clean, &stats, &total_ipsec); smp_wmb(); @@ -725,13 +742,13 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { + if (!ring_is_xdp(ring)) + clean_complete &= + ixgbevf_clean_tx_irq(q_vector, ring, budget); if (ring_is_xsk(ring)) clean_complete &= ixgbevf_clean_xsk_tx_irq(q_vector, ring, budget); - else if (!ring_is_xdp(ring)) - clean_complete &= - ixgbevf_clean_tx_irq(q_vector, ring, budget); } if (budget <= 0) @@ -1139,10 +1156,12 @@ void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) */ static struct xsk_buff_pool *ixgbevf_xsk_pool_from_q(struct ixgbevf_ring *ring) { + struct ixgbevf_adapter *adapter = ring->q_vector->adapter; struct xsk_buff_pool *pool = xsk_get_pool_from_qid(ring->netdev, ring->queue_index); - if (!READ_ONCE(ring->xdp_prog) && !ring_is_xdp(ring)) + if (!rcu_dereference(ring->xdp_prog) && !ring_is_xdp(ring) && + !(adapter->xdp_prog && !adapter->num_xdp_queues)) return NULL; return (pool && pool->dev) ? pool : NULL; @@ -1210,7 +1229,8 @@ void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, num_possible_cpus() > adapter->num_xdp_queues); ring->xsk_pool = ixgbevf_xsk_pool_from_q(ring); - if (ring_is_xdp(ring) && ring->xsk_pool) + if (adapter->xdp_prog && + (!adapter->num_xdp_queues || ring_is_xdp(ring)) && ring->xsk_pool) set_ring_xsk(ring); else clear_ring_xsk(ring); @@ -2028,29 +2048,9 @@ void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { struct libeth_sq_napi_stats stats = { }; - u16 ntc = tx_ring->next_to_clean; - struct xdp_frame_bulk xdp_bulk; - struct libeth_cq_pp cq = { - .dev = tx_ring->dev, - .ss = &stats, - .bq = &xdp_bulk, - }; - - xdp_frame_bulk_init(&xdp_bulk); - - tx_ring->pending = ixgbevf_desc_used(tx_ring); - - for (int i = 0; i < tx_ring->pending; i++) { - struct libeth_sqe *sqe; - sqe = &tx_ring->tx_sqes[ntc]; - - libeth_tx_complete_any(sqe, &cq); - - if (unlikely(++ntc == tx_ring->count)) - ntc = 0; - } - xdp_flush_frame_bulk(&xdp_bulk); + ixgbevf_clean_tx_num(tx_ring, false, ixgbevf_desc_used(tx_ring), + &stats, NULL); /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index d3dd6cd9f48dcb..452b0013898918 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -187,7 +187,7 @@ static inline u32 ixgbevf_prep_tx_sq(void *xdpsq, struct libeth_xdpsq *sq) .ntu = &tx_ring->next_to_use, /* This value is ignored in shared queues */ .pending = &tx_ring->pending, - .pool = NULL, + .pool = tx_ring->xsk_pool, .sqes = tx_ring->xdp_sqes, }; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index 9bb4477f43e979..f3edeae6aa00a6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -208,15 +208,33 @@ static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, LIBETH_XDP_DEFINE_START(); LIBETH_XSK_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx, ixgbevf_prep_xdp_sq, ixgbevf_xsk_xmit_desc); -LIBETH_XSK_DEFINE_RUN_PROG(static ixgbevf_xsk_run_prog, ixgbevf_xsk_flush_tx); +__LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx_shared, + ixgbevf_prep_tx_sq, ixgbevf_xsk_xmit_desc, + libeth_xdp_tx_unprep, xsk); +LIBETH_XDP_DEFINE_END(); + +static bool ixgbevf_xsk_flush_common(struct libeth_xdp_tx_bulk *bq, u32 flags) +{ + const struct ixgbevf_ring *ring = bq->xdpsq; + + if (test_bit(__IXGBEVF_TX_XDP_RING, &ring->state)) + return ixgbevf_xsk_flush_tx(bq, flags); + else + return ixgbevf_xsk_flush_tx_shared(bq, flags); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XSK_DEFINE_RUN_PROG(static ixgbevf_xsk_run_prog, + ixgbevf_xsk_flush_common); LIBETH_XSK_DEFINE_FINALIZE(static ixgbevf_xsk_finalize_xdp_napi, - ixgbevf_xsk_flush_tx, ixgbevf_xdp_rs_and_bump); + ixgbevf_xsk_flush_common, ixgbevf_xdp_rs_and_bump); LIBETH_XDP_DEFINE_END(); u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { struct ixgbevf_adapter *adapter = q_vector->adapter; + int num_xdp_queues = adapter->num_xdp_queues; u32 total_rx_bytes = 0, total_rx_packets = 0; LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); struct libeth_xdp_buff *head_xdp; @@ -227,10 +245,15 @@ u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, if (wake) xsk_clear_rx_need_wakeup(rx_ring->xsk_pool); - head_xdp = rx_ring->xsk_xdp_head; - libeth_xsk_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, - adapter->netdev, adapter->xdp_ring, - adapter->num_xdp_queues); + if (num_xdp_queues) + libeth_xsk_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + num_xdp_queues); + else + libeth_xsk_tx_init_bulk_shared(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, + adapter->tx_ring, + adapter->num_tx_queues); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -323,10 +346,19 @@ bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, { u32 budget = min_t(u32, napi_budget, tx_ring->thresh); - return libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, budget, - NULL, ixgbevf_prep_xdp_sq, - ixgbevf_xsk_xmit_desc, - ixgbevf_xdp_rs_and_bump); + if (ring_is_xdp(tx_ring)) + return libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, + budget, NULL, + ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc, + ixgbevf_xdp_rs_and_bump); + else + return __libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, + budget, NULL, + ixgbevf_prep_tx_sq, + ixgbevf_xsk_xmit_desc, + ixgbevf_xdp_rs_and_bump, + ixgbevf_xdp_tx_unprep); } int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) @@ -338,9 +370,6 @@ int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) return -ENETDOWN; - if (unlikely(queue_id >= adapter->num_xdp_queues)) - return -EINVAL; - rx_ring = adapter->rx_ring[queue_id]; if (unlikely(!ring_is_xsk(rx_ring))) return -EINVAL;