From 6dc49d197f67ec2fe5b5adacc0621b1a33201634 Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 13 Apr 2026 12:12:32 +0200 Subject: [PATCH 01/10] ixgbe: remove legacy rx Similarly, like for virtual function in ixgbevf, remove ixgbe_construct_skb(), the legacy-rx private flag, and all ring_uses_build_skb() conditionals. build_skb is now the only RX code path. This is a preparation for the conversion to libeth and page_pool based Rx buffer management. Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 9 +- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 13 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 113 +++--------------- 3 files changed, 19 insertions(+), 116 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index dce4936708eb44..44f5fc502e6f29 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -312,7 +312,6 @@ struct ixgbe_rx_queue_stats { enum ixgbe_ring_state_t { __IXGBE_RX_3K_BUFFER, - __IXGBE_RX_BUILD_SKB_ENABLED, __IXGBE_RX_RSC_ENABLED, __IXGBE_RX_CSUM_UDP_ZERO_ERR, __IXGBE_RX_FCOE, @@ -324,9 +323,6 @@ enum ixgbe_ring_state_t { __IXGBE_TX_DISABLED, }; -#define ring_uses_build_skb(ring) \ - test_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &(ring)->state) - struct ixgbe_fwd_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct net_device *netdev; @@ -456,8 +452,7 @@ static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) return IXGBE_RXBUFFER_3K; #if (PAGE_SIZE < 8192) - if (ring_uses_build_skb(ring)) - return IXGBE_MAX_2K_FRAME_BUILD_SKB; + return IXGBE_MAX_2K_FRAME_BUILD_SKB; #endif return IXGBE_RXBUFFER_2K; } @@ -672,7 +667,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) #define IXGBE_FLAG2_EEE_CAPABLE BIT(14) #define IXGBE_FLAG2_EEE_ENABLED BIT(15) -#define IXGBE_FLAG2_RX_LEGACY BIT(16) +/* BIT16 used to be reserved for legacy RX flag */ #define IXGBE_FLAG2_IPSEC_ENABLED BIT(17) #define IXGBE_FLAG2_VF_IPSEC_ENABLED BIT(18) #define IXGBE_FLAG2_AUTO_DISABLE_VF BIT(19) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 2ad81f687a844a..eaf50ce47fe5e1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -132,11 +132,9 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBE_TEST_LEN sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IXGBE_PRIV_FLAGS_LEGACY_RX BIT(0) - "legacy-rx", -#define IXGBE_PRIV_FLAGS_VF_IPSEC_EN BIT(1) +#define IXGBE_PRIV_FLAGS_VF_IPSEC_EN BIT(0) "vf-ipsec", -#define IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF BIT(2) +#define IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF BIT(1) "mdd-disable-vf", }; @@ -3667,9 +3665,6 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev) struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); u32 priv_flags = 0; - if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) - priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX; - if (adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED) priv_flags |= IXGBE_PRIV_FLAGS_VF_IPSEC_EN; @@ -3685,10 +3680,6 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) unsigned int flags2 = adapter->flags2; unsigned int i; - flags2 &= ~IXGBE_FLAG2_RX_LEGACY; - if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX) - flags2 |= IXGBE_FLAG2_RX_LEGACY; - flags2 &= ~IXGBE_FLAG2_VF_IPSEC_ENABLED; if (priv_flags & IXGBE_PRIV_FLAGS_VF_IPSEC_EN) flags2 |= IXGBE_FLAG2_VF_IPSEC_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c58051e4350be2..7a4b22d813ba38 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1732,7 +1732,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, static unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring) { - return ring_uses_build_skb(rx_ring) ? IXGBE_SKB_PAD : 0; + return IXGBE_SKB_PAD; } static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, @@ -2034,24 +2034,14 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { - if (ring_uses_build_skb(rx_ring)) { - unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; - unsigned long offset = (unsigned long)(skb->data) & mask; + unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; + unsigned long offset = (unsigned long)(skb->data) & mask; - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - offset, - skb_headlen(skb), - DMA_FROM_DEVICE); - } else { - skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; - - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - skb_frag_off(frag), - skb_frag_size(frag), - DMA_FROM_DEVICE); - } + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + offset, + skb_headlen(skb), + DMA_FROM_DEVICE); /* If the page was released, just unmap it. */ if (unlikely(IXGBE_CB(skb)->page_released)) { @@ -2208,9 +2198,7 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) : - SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(IXGBE_SKB_PAD + size); #endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); @@ -2291,65 +2279,6 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, rx_buffer->skb = NULL; } -static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int size = xdp->data_end - xdp->data; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - /* Note, we get here by enabling legacy-rx via: - * - * ethtool --set-priv-flags legacy-rx on - * - * In this mode, we currently get 0 extra XDP headroom as - * opposed to having legacy-rx off, where we process XDP - * packets going to stack via ixgbe_build_skb(). The latter - * provides us currently with 192 bytes of headroom. - * - * For ixgbe_construct_skb() mode it means that the - * xdp->data_meta will always point to xdp->data, since - * the helper cannot expand the head. Should this ever - * change in future for legacy-rx mode on, then lets also - * add xdp->data_meta handling here. - */ - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - if (size > IXGBE_RX_HDR_SIZE) { - if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) - IXGBE_CB(skb)->dma = rx_buffer->dma; - - skb_add_rx_frag(skb, 0, rx_buffer->page, - xdp->data - page_address(rx_buffer->page), - size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - memcpy(__skb_put(skb, size), - xdp->data, ALIGN(size, sizeof(long))); - rx_buffer->pagecnt_bias++; - } - - return skb; -} - static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, struct xdp_buff *xdp, @@ -2460,10 +2389,8 @@ static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring, #if (PAGE_SIZE < 8192) truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ #else - truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); + truesize = SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); #endif return truesize; } @@ -2567,12 +2494,9 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_bytes += size; } else if (skb) { ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size); - } else if (ring_uses_build_skb(rx_ring)) { + } else { skb = ixgbe_build_skb(rx_ring, rx_buffer, &xdp, rx_desc); - } else { - skb = ixgbe_construct_skb(rx_ring, rx_buffer, - &xdp, rx_desc); } /* exit if we failed to retrieve a buffer */ @@ -4557,8 +4481,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, * This can happen in SRIOV mode when the MTU of the VF is * higher than the MTU of the PF. */ - if (ring_uses_build_skb(ring) && - !test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + if (!test_bit(__IXGBE_RX_3K_BUFFER, ring->state)) rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB | IXGBE_RXDCTL_RLPML_EN; #endif @@ -4733,8 +4656,7 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) rx_ring = adapter->rx_ring[i]; clear_ring_rsc_enabled(rx_ring); - clear_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); - clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + clear_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state); if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); @@ -4742,11 +4664,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state)) set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); - if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) - continue; - - set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); - #if (PAGE_SIZE < 8192) if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); @@ -7335,7 +7252,7 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) */ static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) { - if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + if (PAGE_SIZE >= 8192) return IXGBE_RXBUFFER_2K; else return IXGBE_RXBUFFER_3K; From 3536c94a089503af33fb2f538e21f276423f4acb Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Thu, 7 May 2026 23:33:54 +0200 Subject: [PATCH 02/10] ixgbe: do not share pages between packets Stop sharing pages between multiple Rx buffers. Convert ixgbe Rx path from page-sharing/recycling to a simple one-page-per-packet model. Remove logic that refers to page-sharing buffer management. This is a preparatory step before converting the Rx path to page_pool and libeth. Fixed XDP_DROP/ABORTED: reaches ixgbe_put_rx_buffer() but page is never freed. Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 92 +--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 418 +++--------------- 2 files changed, 68 insertions(+), 442 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 44f5fc502e6f29..c9e59ca4d05b09 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -69,65 +69,13 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_256 256 /* Used for skb receive header */ -#define IXGBE_RXBUFFER_1536 1536 -#define IXGBE_RXBUFFER_2K 2048 #define IXGBE_RXBUFFER_3K 3072 #define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ #define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) -/* Attempt to maximize the headroom available for incoming frames. We - * use a 2K buffer for receives and need 1536/1534 to store the data for - * the frame. This leaves us with 512 bytes of room. From that we need - * to deduct the space needed for the shared info and the padding needed - * to IP align the frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the 3K - * buffers. - */ -#if (PAGE_SIZE < 8192) -#define IXGBE_MAX_2K_FRAME_BUILD_SKB (IXGBE_RXBUFFER_1536 - NET_IP_ALIGN) -#define IXGBE_2K_TOO_SMALL_WITH_PADDING \ -((NET_SKB_PAD + IXGBE_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K)) - -static inline int ixgbe_compute_pad(int rx_buf_len) -{ - int page_size, pad_size; - - page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; - - return pad_size; -} - -static inline int ixgbe_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (IXGBE_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = IXGBE_RXBUFFER_3K + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = IXGBE_RXBUFFER_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return ixgbe_compute_pad(rx_buf_len); -} - -#define IXGBE_SKB_PAD ixgbe_skb_pad() -#else #define IXGBE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif /* * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we @@ -274,11 +222,9 @@ struct ixgbe_tx_buffer { struct ixgbe_rx_buffer { union { struct { - struct sk_buff *skb; dma_addr_t dma; struct page *page; __u32 page_offset; - __u16 pagecnt_bias; }; struct { bool discard; @@ -311,7 +257,6 @@ struct ixgbe_rx_queue_stats { #define IXGBE_TS_HDR_LEN 8 enum ixgbe_ring_state_t { - __IXGBE_RX_3K_BUFFER, __IXGBE_RX_RSC_ENABLED, __IXGBE_RX_CSUM_UDP_ZERO_ERR, __IXGBE_RX_FCOE, @@ -378,12 +323,9 @@ struct ixgbe_ring { unsigned long last_rx_timestamp; - union { - u16 next_to_alloc; - struct { - u8 atr_sample_rate; - u8 atr_count; - }; + struct { + u8 atr_sample_rate; + u8 atr_count; }; u8 dcb_tc; @@ -393,7 +335,7 @@ struct ixgbe_ring { struct ixgbe_tx_queue_stats tx_stats; struct ixgbe_rx_queue_stats rx_stats; }; - u16 rx_offset; + struct sk_buff *skb; /* partial Rx skb across non-EOP */ struct xdp_rxq_info xdp_rxq; spinlock_t tx_lock; /* used in XDP mode */ struct xsk_buff_pool *xsk_pool; @@ -442,31 +384,6 @@ struct ixgbe_ring_feature { #define IXGBE_82599_VMDQ_4Q_MASK 0x7C #define IXGBE_82599_VMDQ_2Q_MASK 0x7E -/* - * FCoE requires that all Rx buffers be over 2200 bytes in length. Since - * this is twice the size of a half page we need to double the page order - * for FCoE enabled Rx queues. - */ -static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) -{ - if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) - return IXGBE_RXBUFFER_3K; -#if (PAGE_SIZE < 8192) - return IXGBE_MAX_2K_FRAME_BUILD_SKB; -#endif - return IXGBE_RXBUFFER_2K; -} - -static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) - return 1; -#endif - return 0; -} -#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) - #define IXGBE_ITR_ADAPTIVE_MIN_INC 2 #define IXGBE_ITR_ADAPTIVE_MIN_USECS 10 #define IXGBE_ITR_ADAPTIVE_MAX_USECS 126 @@ -914,7 +831,6 @@ struct ixgbe_cb { }; dma_addr_t dma; u16 append_cnt; - bool page_released; }; #define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7a4b22d813ba38..eda8cbd11fe82a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -592,7 +592,6 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) struct my_u0 { u64 a; u64 b; } *u0; struct ixgbe_ring *rx_ring; union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *rx_buffer_info; int i = 0; if (!netif_msg_hw(adapter)) @@ -790,14 +789,12 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_info("------------------------------------\n"); pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("%s%s%s\n", + pr_info("%s%s\n", "R [desc] [ PktBuf A0] ", - "[ HeadBuf DD] [bi->dma ] [bi->skb ] ", - "<-- Adv Rx Read format"); - pr_info("%s%s%s\n", + "[ HeadBuf DD] <-- Adv Rx Read format"); + pr_info("%s%s\n", "RWB[desc] [PcsmIpSHl PtRs] ", - "[vl er S cks ln] ---------------- [bi->skb ] ", - "<-- Adv Rx Write-Back format"); + "[vl er S cks ln] <-- Adv Rx Write-Back format"); for (i = 0; i < rx_ring->count; i++) { const char *ring_desc; @@ -809,34 +806,20 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) else ring_desc = ""; - rx_buffer_info = &rx_ring->rx_buffer_info[i]; rx_desc = IXGBE_RX_DESC(rx_ring, i); u0 = (struct my_u0 *)rx_desc; if (rx_desc->wb.upper.length) { - /* Descriptor Done */ - pr_info("RWB[0x%03X] %016llX %016llX ---------------- %p%s\n", + pr_info("RWB[0x%03X] %016llX %016llX%s\n", i, le64_to_cpu((__force __le64)u0->a), le64_to_cpu((__force __le64)u0->b), - rx_buffer_info->skb, ring_desc); } else { - pr_info("R [0x%03X] %016llX %016llX %016llX %p%s\n", + pr_info("R [0x%03X] %016llX %016llX%s\n", i, le64_to_cpu((__force __le64)u0->a), le64_to_cpu((__force __le64)u0->b), - (u64)rx_buffer_info->dma, - rx_buffer_info->skb, ring_desc); - - if (netif_msg_pktdata(adapter) && - rx_buffer_info->dma) { - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - page_address(rx_buffer_info->page) + - rx_buffer_info->page_offset, - ixgbe_rx_bufsz(rx_ring), true); - } } } } @@ -1730,10 +1713,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, } } -static unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring) -{ - return IXGBE_SKB_PAD; -} static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *bi) @@ -1741,29 +1720,19 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct page *page = bi->page; dma_addr_t dma; - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - /* alloc new page for storage */ - page = dev_alloc_pages(ixgbe_rx_pg_order(rx_ring)); + page = dev_alloc_page(); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE, IXGBE_RX_DMA_ATTR); - /* - * if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ixgbe_rx_pg_order(rx_ring)); + __free_page(page); rx_ring->rx_stats.alloc_rx_page_failed++; return false; @@ -1771,9 +1740,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = rx_ring->rx_offset; - page_ref_add(page, USHRT_MAX - 1); - bi->pagecnt_bias = USHRT_MAX; + bi->page_offset = rx_ring->xdp_prog ? XDP_PACKET_HEADROOM : + IXGBE_SKB_PAD; rx_ring->rx_stats.alloc_rx_page++; return true; @@ -1789,7 +1757,6 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; u16 i = rx_ring->next_to_use; - u16 bufsz; /* nothing to do */ if (!cleaned_count) @@ -1799,15 +1766,14 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; - bufsz = ixgbe_rx_bufsz(rx_ring); - do { if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, bufsz, + bi->page_offset, + IXGBE_RXBUFFER_3K, DMA_FROM_DEVICE); /* @@ -1836,9 +1802,6 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) if (rx_ring->next_to_use != i) { rx_ring->next_to_use = i; - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = i; - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -1972,8 +1935,6 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_buffer_info[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -2021,36 +1982,6 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, skb->tail += pull_len; } -/** - * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being updated - * - * This function provides a basic DMA sync up for the first fragment of an - * skb. The reason for doing this is that the first fragment cannot be - * unmapped until we have reached the end of packet descriptor for a buffer - * chain. - */ -static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, - struct sk_buff *skb) -{ - unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; - unsigned long offset = (unsigned long)(skb->data) & mask; - - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - offset, - skb_headlen(skb), - DMA_FROM_DEVICE); - - /* If the page was released, just unmap it. */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); - } -} /** * ixgbe_cleanup_headers - Correct corrupted or empty headers @@ -2101,80 +2032,10 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return false; #endif - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; return false; } -/** - * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *old_buff) -{ - struct ixgbe_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* Transfer page from old buffer to new buffer. - * Move each member individually to avoid possible store - * forwarding stalls and unnecessary copy of skb. - */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer, - int rx_buffer_pgcnt) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) - return false; -#else - /* The last offset is a bit aggressive in that we assume the - * worst case of FCoE being enabled and using a 3K buffer. - * However this should have minimal impact as the 1K extra is - * still less than one buffer in size. - */ -#define IXGBE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K) - if (rx_buffer->page_offset > IXGBE_LAST_OFFSET) - return false; -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(pagecnt_bias == 1)) { - page_ref_add(page, USHRT_MAX - 1); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - /** * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -2183,100 +2044,45 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer, * @size: size of data in rx_buffer * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. **/ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(IXGBE_SKB_PAD + size); -#endif + unsigned int truesize = SKB_DATA_ALIGN(rx_buffer->page_offset + size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif } static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff **skb, - const unsigned int size, - int *rx_buffer_pgcnt) + const unsigned int size) { struct ixgbe_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - *rx_buffer_pgcnt = -#if (PAGE_SIZE < 8192) - page_count(rx_buffer->page); -#else - 0; -#endif prefetchw(rx_buffer->page); - *skb = rx_buffer->skb; - /* Delay unmapping of the first packet. It carries the header - * information, HW may still access the header after the writeback. - * Only unmap it when EOP is reached - */ - if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) { - if (!*skb) - goto skip_sync; - } else { - if (*skb) - ixgbe_dma_sync_frag(rx_ring, *skb); - } - - /* we are reusing so sync this buffer for CPU use */ + /* sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, size, DMA_FROM_DEVICE); -skip_sync: - rx_buffer->pagecnt_bias--; return rx_buffer; } static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb, - int rx_buffer_pgcnt) + bool put_page_flag) { - if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); - } - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of rx_buffer */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, + DMA_FROM_DEVICE, IXGBE_RX_DMA_ATTR); + if (put_page_flag) + put_page(rx_buffer->page); rx_buffer->page = NULL; - rx_buffer->skb = NULL; } static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, @@ -2285,13 +2091,9 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc) { unsigned int metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + SKB_DATA_ALIGN(xdp->data_end - xdp->data_hard_start); -#endif struct sk_buff *skb; /* Prefetch first cache line of first page. If xdp->data_meta @@ -2316,13 +2118,6 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) IXGBE_CB(skb)->dma = rx_buffer->dma; - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - return skb; } @@ -2381,31 +2176,6 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, return result; } -static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring, - unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -#endif - return truesize; -} - -static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *rx_buffer, - unsigned int size) -{ - unsigned int truesize = ixgbe_rx_frame_truesize(rx_ring, size); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} /** * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf @@ -2424,29 +2194,25 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, const int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbe_adapter *adapter = q_vector->adapter; #ifdef IXGBE_FCOE int ddp_bytes; unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); - unsigned int offset = rx_ring->rx_offset; + unsigned int offset = rx_ring->xdp_prog ? XDP_PACKET_HEADROOM : + IXGBE_SKB_PAD; unsigned int xdp_xmit = 0; struct xdp_buff xdp; + struct sk_buff *skb = rx_ring->skb; int xdp_res = 0; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0); -#endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + xdp_init_buff(&xdp, PAGE_SIZE, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *rx_buffer; - struct sk_buff *skb; - int rx_buffer_pgcnt; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -2466,7 +2232,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ dma_rmb(); - rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt); + rx_buffer = ixgbe_get_rx_buffer(rx_ring, size); /* retrieve a buffer from the ring */ if (!skb) { @@ -2476,20 +2242,13 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, rx_buffer->page_offset - offset; xdp_prepare_buff(&xdp, hard_start, offset, size, true); xdp_buff_clear_frags_flag(&xdp); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size); -#endif xdp_res = ixgbe_run_xdp(adapter, rx_ring, &xdp); } if (xdp_res) { - if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { + if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) xdp_xmit |= xdp_res; - ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size); - } else { - rx_buffer->pagecnt_bias++; - } + total_rx_packets++; total_rx_bytes += size; } else if (skb) { @@ -2502,11 +2261,13 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!xdp_res && !skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; + ixgbe_put_rx_buffer(rx_ring, rx_buffer, true); + cleaned_count++; break; } - ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt); + ixgbe_put_rx_buffer(rx_ring, rx_buffer, + xdp_res == IXGBE_XDP_CONSUMED); cleaned_count++; /* place incomplete frames back on ring for completion */ @@ -2554,6 +2315,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_packets++; } + rx_ring->skb = skb; + if (xdp_xmit & IXGBE_XDP_REDIR) xdp_do_flush(); @@ -4097,10 +3860,8 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; else srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - } else if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) { - srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; } else { - srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; } /* configure descriptor type */ @@ -4471,24 +4232,8 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; -#if (PAGE_SIZE < 8192) - /* RXDCTL.RLPML does not work on 82599 */ - } else if (hw->mac.type != ixgbe_mac_82599EB) { - rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | - IXGBE_RXDCTL_RLPML_EN); - - /* Limit the maximum frame size so we don't overrun the skb. - * This can happen in SRIOV mode when the MTU of the VF is - * higher than the MTU of the PF. - */ - if (!test_bit(__IXGBE_RX_3K_BUFFER, ring->state)) - rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB | - IXGBE_RXDCTL_RLPML_EN; -#endif } - ring->rx_offset = ixgbe_rx_offset(ring); - if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) { u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); @@ -4499,10 +4244,6 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, ring->rx_buf_len = xsk_buf_len; } - /* initialize rx_buffer_info */ - memset(ring->rx_buffer_info, 0, - sizeof(struct ixgbe_rx_buffer) * ring->count); - /* initialize Rx descriptor 0 */ rx_desc = IXGBE_RX_DESC(ring, 0); rx_desc->wb.upper.length = 0; @@ -4619,8 +4360,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - struct ixgbe_ring *rx_ring; - int i; u32 mhadd, hlreg0; #ifdef IXGBE_FCOE @@ -4628,7 +4367,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && (max_frame < IXGBE_FCOE_JUMBO_FRAME_SIZE)) max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE; - #endif /* IXGBE_FCOE */ /* adjust max frame to be at least the size of a standard frame */ @@ -4644,35 +4382,11 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) } hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); - /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */ + /* set jumbo enable since MHADD.MFS is keeping size locked at + * max_frame + */ hlreg0 |= IXGBE_HLREG0_JUMBOEN; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); - - /* - * Setup the HW Rx Head and Tail Descriptor Pointers and - * the Base and Length of the Rx Descriptor Ring - */ - for (i = 0; i < adapter->num_rx_queues; i++) { - rx_ring = adapter->rx_ring[i]; - - clear_ring_rsc_enabled(rx_ring); - clear_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state); - - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) - set_ring_rsc_enabled(rx_ring); - - if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state)) - set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); - -#if (PAGE_SIZE < 8192) - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) - set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); - - if (IXGBE_2K_TOO_SMALL_WITH_PADDING || - (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))) - set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); -#endif - } } static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter) @@ -4757,8 +4471,15 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) * Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - ixgbe_configure_rx_ring(adapter, adapter->rx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; + + clear_ring_rsc_enabled(rx_ring); + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) + set_ring_rsc_enabled(rx_ring); + + ixgbe_configure_rx_ring(adapter, rx_ring); + } rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); /* disable drop enable for 82598 parts */ @@ -5664,35 +5385,28 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) goto skip_free; } - /* Free all the Rx ring sk_buffs */ - while (i != rx_ring->next_to_alloc) { - if (rx_buffer->skb) { - struct sk_buff *skb = rx_buffer->skb; - if (IXGBE_CB(skb)->page_released) - dma_unmap_page_attrs(rx_ring->dev, - IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); - dev_kfree_skb(skb); - } + /* Free Rx ring sk_buff */ + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring pages */ + while (i != rx_ring->next_to_use) { /* Invalidate cache lines that may have been written to by * device so that we avoid corrupting memory. */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), + IXGBE_RXBUFFER_3K, DMA_FROM_DEVICE); /* free resources associated with mapping */ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, + PAGE_SIZE, DMA_FROM_DEVICE, IXGBE_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); + __free_page(rx_buffer->page); i++; rx_buffer++; @@ -5703,7 +5417,6 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) } skip_free: - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -7252,10 +6965,7 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) */ static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) { - if (PAGE_SIZE >= 8192) - return IXGBE_RXBUFFER_2K; - else - return IXGBE_RXBUFFER_3K; + return IXGBE_RXBUFFER_3K; } /** @@ -10820,11 +10530,11 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (ring_is_rsc_enabled(ring)) return -EINVAL; - - if (frame_size > ixgbe_rx_bufsz(ring)) - return -EINVAL; } + if (frame_size > IXGBE_RXBUFFER_3K) + return -EINVAL; + /* if the number of cpus is much larger than the maximum of queues, * we should stop it and then return with ENOMEM like before. */ From 4d7562ca0e8eaa41d382e8859519d20844ab1f63 Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Thu, 7 May 2026 23:34:48 +0200 Subject: [PATCH 03/10] ixgbe: use libeth in Rx processing Convert ixgbe Rx path from manual page-per-packet DMA management to libeth/page_pool infrastructure, and integrates libeth XDP helpers. Each packet gets its own buffer instead of sharing pages between packets. Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe.h | 35 +- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 23 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 459 +++++++----------- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 19 +- 6 files changed, 213 insertions(+), 326 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index b513baf3cbb296..072133b4a4416d 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -146,6 +146,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + select LIBETH_XDP select LIBIE_FWLOG if DEBUG_FS select MDIO select NET_DEVLINK diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index c9e59ca4d05b09..2d69ed0df4464f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -33,6 +33,7 @@ #include "ixgbe_ipsec.h" #include +#include /* common prefix used by pr_<> macros */ #undef pr_fmt @@ -73,9 +74,8 @@ #define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ -#define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) - -#define IXGBE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#define IXGBE_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) /* * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we @@ -219,18 +219,9 @@ struct ixgbe_tx_buffer { u32 tx_flags; }; -struct ixgbe_rx_buffer { - union { - struct { - dma_addr_t dma; - struct page *page; - __u32 page_offset; - }; - struct { - bool discard; - struct xdp_buff *xdp; - }; - }; +struct ixgbe_xsk_rx_buffer { + bool discard; + struct xdp_buff *xdp; }; struct ixgbe_queue_stats { @@ -298,17 +289,22 @@ struct ixgbe_ring { struct ixgbe_ring *next; /* pointer to next ring in q_vector */ struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */ struct net_device *netdev; /* netdev ring belongs to */ - struct bpf_prog *xdp_prog; - struct device *dev; /* device for DMA mapping */ + struct bpf_prog __rcu *xdp_prog; + union { + struct page_pool *pp; /* Rx ring */ + struct device *dev; /* Tx ring */ + }; void *desc; /* descriptor ring memory */ union { + struct libeth_fqe *rx_fqes; + struct ixgbe_xsk_rx_buffer *rx_xsk_buffer_info; struct ixgbe_tx_buffer *tx_buffer_info; - struct ixgbe_rx_buffer *rx_buffer_info; }; unsigned long state; u8 __iomem *tail; dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ + u32 truesize; u16 count; /* amount of descriptors */ @@ -340,7 +336,7 @@ struct ixgbe_ring { spinlock_t tx_lock; /* used in XDP mode */ struct xsk_buff_pool *xsk_pool; u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */ - u16 rx_buf_len; + u32 rx_buf_len; } ____cacheline_internodealigned_in_smp; enum ixgbe_ring_f_enum { @@ -384,6 +380,7 @@ struct ixgbe_ring_feature { #define IXGBE_82599_VMDQ_4Q_MASK 0x7C #define IXGBE_82599_VMDQ_2Q_MASK 0x7E + #define IXGBE_ITR_ADAPTIVE_MIN_INC 2 #define IXGBE_ITR_ADAPTIVE_MIN_USECS 10 #define IXGBE_ITR_ADAPTIVE_MAX_USECS 126 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index eaf50ce47fe5e1..41e17d64ca3013 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1949,7 +1949,6 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter) /* Setup Rx Descriptor ring and Rx buffers */ rx_ring->count = IXGBE_DEFAULT_RXD; rx_ring->queue_index = 0; - rx_ring->dev = &adapter->pdev->dev; rx_ring->netdev = adapter->netdev; rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx; @@ -2056,14 +2055,16 @@ static void ixgbe_create_lbtest_frame(struct sk_buff *skb, skb->data[frame_size + 12] = 0xAF; } -static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer, +static bool ixgbe_check_lbtest_frame(const struct libeth_fqe *rx_buffer, unsigned int frame_size) { + u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; unsigned char *data; frame_size >>= 1; - data = page_address(rx_buffer->page) + rx_buffer->page_offset; + data = page_address(__netmem_to_page(rx_buffer->netmem)) + + rx_buffer->offset + hr; return data[3] == 0xFF && data[frame_size + 10] == 0xBE && data[frame_size + 12] == 0xAF; @@ -2111,16 +2112,13 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, } while (rx_desc->wb.upper.length) { - struct ixgbe_rx_buffer *rx_buffer; + struct libeth_fqe *rx_buffer; /* check Rx buffer */ - rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; + rx_buffer = &rx_ring->rx_fqes[rx_ntc]; /* sync Rx buffer for CPU read */ - dma_sync_single_for_cpu(rx_ring->dev, - rx_buffer->dma, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + libeth_rx_sync_for_cpu(rx_buffer, rx_ring->rx_buf_len); /* verify contents of skb */ if (ixgbe_check_lbtest_frame(rx_buffer, size)) @@ -2128,11 +2126,8 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, else break; - /* sync Rx buffer for device write */ - dma_sync_single_for_device(rx_ring->dev, - rx_buffer->dma, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + /* recycle the page back to the pool */ + libeth_rx_recycle_slow(rx_buffer->netmem); /* increment Rx next to clean counter */ rx_ntc++; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index eda8cbd11fe82a..aa682cb23b2b02 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -174,6 +175,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); MODULE_IMPORT_NS("LIBIE_FWLOG"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); +MODULE_IMPORT_NS("LIBETH"); MODULE_LICENSE("GPL v2"); DEFINE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); @@ -1508,7 +1510,7 @@ static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter, u8 reg_idx = rx_ring->reg_idx; if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) - rxctrl = dca3_get_tag(rx_ring->dev, cpu); + rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); switch (hw->mac.type) { case ixgbe_mac_82599EB: @@ -1713,40 +1715,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, } } - -static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* alloc new page for storage */ - page = dev_alloc_page(); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE, IXGBE_RX_DMA_ATTR); - - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); - - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = rx_ring->xdp_prog ? XDP_PACKET_HEADROOM : - IXGBE_SKB_PAD; - rx_ring->rx_stats.alloc_rx_page++; - - return true; -} - /** * ixgbe_alloc_rx_buffers - Replace used receive buffers * @rx_ring: ring to place buffers on @@ -1755,40 +1723,34 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *bi; - u16 i = rx_ring->next_to_use; + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; + u16 ntu = rx_ring->next_to_use; /* nothing to do */ if (!cleaned_count) return; - - rx_desc = IXGBE_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; + rx_desc = IXGBE_RX_DESC(rx_ring, ntu); do { - if (!ixgbe_alloc_mapped_page(rx_ring, bi)) + dma_addr_t addr; + + addr = libeth_rx_alloc(&fq, ntu); + if (addr == DMA_MAPPING_ERROR) break; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - IXGBE_RXBUFFER_3K, - DMA_FROM_DEVICE); - - /* - * Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. - */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; - i++; - if (unlikely(!i)) { + ntu++; + + if (unlikely(ntu == rx_ring->count)) { rx_desc = IXGBE_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; - i -= rx_ring->count; + ntu = 0; } /* clear the length for the next_to_use descriptor */ @@ -1797,18 +1759,16 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) cleaned_count--; } while (cleaned_count); - i += rx_ring->count; - - if (rx_ring->next_to_use != i) { - rx_ring->next_to_use = i; - + if (likely(rx_ring->next_to_use != ntu)) { + /* record the next descriptor to use */ + rx_ring->next_to_use = ntu; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); - writel(i, rx_ring->tail); + writel(ntu, rx_ring->tail); } } @@ -1880,8 +1840,6 @@ void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, else macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, false); - - skb->protocol = eth_type_trans(skb, dev); } void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, @@ -1982,7 +1940,6 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, skb->tail += pull_len; } - /** * ixgbe_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on @@ -2044,86 +2001,28 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, * @size: size of data in rx_buffer * * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. **/ -static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *rx_buffer, +static void ixgbe_add_rx_frag(const struct libeth_fqe *rx_buffer, struct sk_buff *skb, unsigned int size) { - unsigned int truesize = SKB_DATA_ALIGN(rx_buffer->page_offset + size); - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); -} - -static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, - const unsigned int size) -{ - struct ixgbe_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - - /* sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); + u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; - return rx_buffer; + skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, + rx_buffer->netmem, rx_buffer->offset + hr, + size, rx_buffer->truesize); } -static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *rx_buffer, - bool put_page_flag) -{ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, - DMA_FROM_DEVICE, IXGBE_RX_DMA_ATTR); - if (put_page_flag) - put_page(rx_buffer->page); - rx_buffer->page = NULL; -} - -static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); - struct sk_buff *skb; - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points exactly as xdp->data, otherwise we - * likely have a consumer accessing first few bytes of meta - * data, and then actual data. - */ - net_prefetch(xdp->data_meta); - - /* build an skb to around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); - if (unlikely(!skb)) - return NULL; - - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); - - /* record DMA address if this is the start of a chain of buffers */ - if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) - IXGBE_CB(skb)->dma = rx_buffer->dma; - - return skb; -} static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, - struct xdp_buff *xdp) + struct libeth_xdp_buff *xdp) { int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; @@ -2136,14 +2035,14 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, if (!xdp_prog) goto xdp_out; - prefetchw(xdp->data_hard_start); /* xdp_frame write */ + prefetchw(xdp->base.data_hard_start); /* xdp_frame write */ - act = bpf_prog_run_xdp(xdp_prog, xdp); + act = bpf_prog_run_xdp(xdp_prog, &xdp->base); switch (act) { case XDP_PASS: break; case XDP_TX: - xdpf = xdp_convert_buff_to_frame(xdp); + xdpf = xdp_convert_buff_to_frame(&xdp->base); if (unlikely(!xdpf)) goto out_failure; ring = ixgbe_determine_xdp_ring(adapter); @@ -2156,7 +2055,7 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, goto out_failure; break; case XDP_REDIRECT: - err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); + err = xdp_do_redirect(adapter->netdev, &xdp->base, xdp_prog); if (err) goto out_failure; result = IXGBE_XDP_REDIR; @@ -2170,13 +2069,13 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: result = IXGBE_XDP_CONSUMED; + libeth_xdp_return_buff(xdp); break; } xdp_out: return result; } - /** * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @q_vector: structure containing interrupt and ring information @@ -2201,18 +2100,17 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); - unsigned int offset = rx_ring->xdp_prog ? XDP_PACKET_HEADROOM : - IXGBE_SKB_PAD; - unsigned int xdp_xmit = 0; - struct xdp_buff xdp; + LIBETH_XDP_ONSTACK_BUFF(xdp); struct sk_buff *skb = rx_ring->skb; + unsigned int xdp_xmit = 0; int xdp_res = 0; - xdp_init_buff(&xdp, PAGE_SIZE, &rx_ring->xdp_rxq); + xdp->data = NULL; + xdp->base.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *rx_buffer; + struct libeth_fqe *rx_buffer; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -2232,18 +2130,16 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ dma_rmb(); - rx_buffer = ixgbe_get_rx_buffer(rx_ring, size); + rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; /* retrieve a buffer from the ring */ if (!skb) { - unsigned char *hard_start; + libeth_xdp_process_buff(xdp, rx_buffer, size); + xdp_res = ixgbe_run_xdp(adapter, rx_ring, xdp); + } else { + libeth_rx_sync_for_cpu(rx_buffer, size); + } - hard_start = page_address(rx_buffer->page) + - rx_buffer->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); - xdp_buff_clear_frags_flag(&xdp); - xdp_res = ixgbe_run_xdp(adapter, rx_ring, &xdp); - } if (xdp_res) { if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) @@ -2252,10 +2148,9 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_packets++; total_rx_bytes += size; } else if (skb) { - ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size); + ixgbe_add_rx_frag(rx_buffer, skb, size); } else { - skb = ixgbe_build_skb(rx_ring, rx_buffer, - &xdp, rx_desc); + skb = xdp_build_skb_from_buff(&xdp->base); } /* exit if we failed to retrieve a buffer */ @@ -2266,8 +2161,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, break; } - ixgbe_put_rx_buffer(rx_ring, rx_buffer, - xdp_res == IXGBE_XDP_CONSUMED); cleaned_count++; /* place incomplete frames back on ring for completion */ @@ -2275,8 +2168,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, continue; /* verify the packet layout is correct */ - if (xdp_res || ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) + if (xdp_res || ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -2304,12 +2199,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } if (!ddp_bytes) { dev_kfree_skb_any(skb); + skb = NULL; continue; } } #endif /* IXGBE_FCOE */ ixgbe_rx_skb(q_vector, skb); + skb = NULL; /* update budget accounting */ total_rx_packets++; @@ -3825,11 +3722,11 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, - struct ixgbe_ring *rx_ring) + struct ixgbe_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; - u8 reg_idx = rx_ring->reg_idx; + u8 reg_idx = ring->reg_idx; if (hw->mac.type == ixgbe_mac_82598EB) { u16 mask = adapter->ring_feature[RING_F_RSS].mask; @@ -3844,25 +3741,12 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, /* configure header buffer length, needed for RSC */ srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - /* configure the packet buffer length */ - if (rx_ring->xsk_pool) { - u32 xsk_buf_len = xsk_pool_get_rx_frame_size(rx_ring->xsk_pool); - - /* If the MAC support setting RXDCTL.RLPML, the - * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and - * RXDCTL.RLPML is set to the actual UMEM buffer - * size. If not, then we are stuck with a 1k buffer - * size resolution. In this case frames larger than - * the UMEM buffer size viewed in a 1k resolution will - * be dropped. - */ - if (hw->mac.type != ixgbe_mac_82599EB) - srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - } else { - srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - } + if (ring->xsk_pool) + srrctl |= DIV_ROUND_UP(xsk_pool_get_rx_frame_size(ring->xsk_pool), + IXGBE_SRRCTL_BSIZEPKT_STEP); + else + srrctl |= DIV_ROUND_UP(ring->rx_buf_len, + IXGBE_SRRCTL_BSIZEPKT_STEP); /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -4196,8 +4080,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, NULL)); xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); } else { - WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, NULL)); + xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, ring->pp); } /* disable queue to avoid use of these values while updating state */ @@ -4219,7 +4102,6 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_RDT(reg_idx), 0); ring->tail = adapter->io_addr + IXGBE_RDT(reg_idx); - ixgbe_configure_srrctl(adapter, ring); ixgbe_configure_rscctl(adapter, ring); if (hw->mac.type == ixgbe_mac_82598EB) { @@ -4235,15 +4117,17 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, } if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) { - u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); - - rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | - IXGBE_RXDCTL_RLPML_EN); - rxdctl |= xsk_buf_len | IXGBE_RXDCTL_RLPML_EN; + u32 pkt_len = + READ_ONCE(adapter->netdev->mtu) + LIBETH_RX_LL_LEN; - ring->rx_buf_len = xsk_buf_len; + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); + if (pkt_len <= IXGBE_RXDCTL_RLPMLMASK) + rxdctl |= pkt_len | IXGBE_RXDCTL_RLPML_EN; } + ixgbe_configure_srrctl(adapter, ring); + + /* initialize Rx descriptor 0 */ rx_desc = IXGBE_RX_DESC(ring, 0); rx_desc->wb.upper.length = 0; @@ -4355,40 +4239,6 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) } } -static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - u32 mhadd, hlreg0; - -#ifdef IXGBE_FCOE - /* adjust max frame to be able to do baby jumbo for FCoE */ - if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && - (max_frame < IXGBE_FCOE_JUMBO_FRAME_SIZE)) - max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE; -#endif /* IXGBE_FCOE */ - - /* adjust max frame to be at least the size of a standard frame */ - if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) - max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN); - - mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); - if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { - mhadd &= ~IXGBE_MHADD_MFS_MASK; - mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT; - - IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); - } - - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); - /* set jumbo enable since MHADD.MFS is keeping size locked at - * max_frame - */ - hlreg0 |= IXGBE_HLREG0_JUMBOEN; - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); -} - static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -4442,8 +4292,12 @@ static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter) static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + struct ixgbe_ring *rx_ring; int i; u32 rxctrl, rfctl; + u32 mhadd, hlreg0; /* disable receives while setting up the descriptors */ hw->mac.ops.disable_rx(hw); @@ -4464,15 +4318,38 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) /* Program registers for the distribution of queues */ ixgbe_setup_mrqc(adapter); - /* set_rx_buffer_len must be called before ring initialization */ - ixgbe_set_rx_buffer_len(adapter); +#ifdef IXGBE_FCOE + /* adjust max frame to be able to do baby jumbo for FCoE */ + if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && + (max_frame < IXGBE_FCOE_JUMBO_FRAME_SIZE)) + max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE; +#endif /* IXGBE_FCOE */ + + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN); + + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT; + + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + /* set jumbo enable since MHADD.MFS is keeping size locked at + * max_frame + */ + hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg0 |= IXGBE_HLREG0_JUMBOEN; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); /* * Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; + rx_ring = adapter->rx_ring[i]; clear_ring_rsc_enabled(rx_ring); if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) @@ -5377,9 +5254,6 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) **/ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) { - u16 i = rx_ring->next_to_clean; - struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i]; - if (rx_ring->xsk_pool) { ixgbe_xsk_clean_rx_ring(rx_ring); goto skip_free; @@ -5391,29 +5265,12 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) rx_ring->skb = NULL; } - /* Free all the Rx ring pages */ - while (i != rx_ring->next_to_use) { - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IXGBE_RXBUFFER_3K, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); - __free_page(rx_buffer->page); + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; - i++; - rx_buffer++; - if (i == rx_ring->count) { + libeth_rx_recycle_slow(rx_fqe->netmem); + if (unlikely(++i == rx_ring->count)) i = 0; - rx_buffer = rx_ring->rx_buffer_info; - } } skip_free: @@ -6789,54 +6646,80 @@ static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring) int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring) { - struct device *dev = rx_ring->dev; + struct libeth_fq fq = { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_MTU, + .xdp = !!rx_ring->xdp_prog, + .buf_len = IXGBE_RX_PAGE_LEN(rx_ring->xdp_prog ? + LIBETH_XDP_HEADROOM : + LIBETH_SKB_HEADROOM), + }; + struct device *dev = &adapter->pdev->dev; int orig_node = dev_to_node(dev); + void *napi_dev; int ring_node = NUMA_NO_NODE; - int size; - - size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; + int ret; - if (rx_ring->q_vector) + if (rx_ring->q_vector) { + fq.nid = rx_ring->q_vector->numa_node; ring_node = rx_ring->q_vector->numa_node; + napi_dev = &rx_ring->q_vector->napi; + } else { + fq.no_napi = true; + napi_dev = &adapter->pdev->dev; + } - rx_ring->rx_buffer_info = vmalloc_node(size, ring_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vmalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; + ret = libeth_rx_fq_create(&fq, napi_dev); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); set_dev_node(dev, ring_node); - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); set_dev_node(dev, orig_node); if (!rx_ring->desc) rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->desc) - goto err; + &rx_ring->dma, + GFP_KERNEL); + if (!rx_ring->desc) { + ret = -ENOMEM; + goto err_destroy_fq; + } rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0) - goto err; + ret = xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, + ixgbe_rx_napi_id(rx_ring)); + if (ret < 0) + goto err_free_desc; WRITE_ONCE(rx_ring->xdp_prog, adapter->xdp_prog); return 0; -err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; + +err_free_desc: + dma_free_coherent(dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + rx_ring->desc = NULL; +err_destroy_fq: dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); - return -ENOMEM; + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + return ret; } /** @@ -6923,21 +6806,31 @@ static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter) **/ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) { + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; + ixgbe_clean_rx_ring(rx_ring); rx_ring->xdp_prog = NULL; xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; /* if not set, then don't free */ if (!rx_ring->desc) return; - dma_free_coherent(rx_ring->dev, rx_ring->size, - rx_ring->desc, rx_ring->dma); + dma_free_coherent(fq.pp->p.dev, rx_ring->size, rx_ring->desc, + rx_ring->dma); rx_ring->desc = NULL; + + kvfree(rx_ring->rx_xsk_buffer_info); + rx_ring->rx_xsk_buffer_info = NULL; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; } /** @@ -6980,7 +6873,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (ixgbe_enabled_xdp_adapter(adapter)) { - int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD; + int new_frame_size = new_mtu + LIBETH_RX_LL_LEN; if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { e_warn(probe, "Requested MTU size is not supported with XDP\n"); @@ -10512,7 +10405,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int i, frame_size = READ_ONCE(dev->mtu) + LIBETH_RX_LL_LEN; struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct bpf_prog *old_prog; bool need_reset; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index b1bfeb21537acc..825cf17fa1878d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2452,7 +2452,7 @@ enum { #define IXGBE_PSRTYPE_L2HDR 0x00001000 /* SRRCTL bit definitions */ -#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_BSIZEPKT_STEP 1024 #define IXGBE_SRRCTL_RDMTS_SHIFT 22 #define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 #define IXGBE_SRRCTL_DROP_EN 0x10000000 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 7b941505a9d024..15425d3c367872 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -151,7 +151,7 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *bi; + struct ixgbe_xsk_rx_buffer *bi; u16 i = rx_ring->next_to_use; dma_addr_t dma; bool ok = true; @@ -161,7 +161,7 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) return true; rx_desc = IXGBE_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; + bi = &rx_ring->rx_xsk_buffer_info[i]; i -= rx_ring->count; do { @@ -183,7 +183,7 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) i++; if (unlikely(!i)) { rx_desc = IXGBE_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; + bi = rx_ring->rx_xsk_buffer_info; i -= rx_ring->count; } @@ -257,7 +257,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *bi; + struct ixgbe_xsk_rx_buffer *bi; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -279,17 +279,17 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, */ dma_rmb(); - bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + bi = &rx_ring->rx_xsk_buffer_info[rx_ring->next_to_clean]; if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) { - struct ixgbe_rx_buffer *next_bi; + struct ixgbe_xsk_rx_buffer *next_bi; xsk_buff_free(bi->xdp); bi->xdp = NULL; ixgbe_inc_ntc(rx_ring); next_bi = - &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + &rx_ring->rx_xsk_buffer_info[rx_ring->next_to_clean]; next_bi->discard = true; continue; } @@ -345,6 +345,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, total_rx_bytes += skb->len; total_rx_packets++; + skb->protocol = eth_type_trans(skb, rx_ring->netdev); ixgbe_process_skb_fields(rx_ring, rx_desc, skb); ixgbe_rx_skb(q_vector, skb); } @@ -374,11 +375,11 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring) { - struct ixgbe_rx_buffer *bi; + struct ixgbe_xsk_rx_buffer *bi; u16 i; for (i = 0; i < rx_ring->count; i++) { - bi = &rx_ring->rx_buffer_info[i]; + bi = &rx_ring->rx_xsk_buffer_info[i]; if (!bi->xdp) continue; From 97d12450db9635e0c7aa08b79011095838338b2b Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Thu, 28 May 2026 15:03:24 +0200 Subject: [PATCH 04/10] ixgbe: branch prediction and cleanup * Added branch prediction hints to the ixgbe Rx path and clean up the packet accounting. * Moved total_rx_bytes and total_rx_packets accounting together. Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index aa682cb23b2b02..21c1642a1c2794 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1732,7 +1732,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) u16 ntu = rx_ring->next_to_use; /* nothing to do */ - if (!cleaned_count) + if (unlikely(!cleaned_count)) return; rx_desc = IXGBE_RX_DESC(rx_ring, ntu); @@ -1748,7 +1748,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) rx_desc++; ntu++; - if (unlikely(ntu == rx_ring->count)) { + if (unlikely(ntu == fq.count)) { rx_desc = IXGBE_RX_DESC(rx_ring, 0); ntu = 0; } @@ -2121,7 +2121,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); - if (!size) + if (unlikely(!size)) break; /* This memory barrier is needed to keep us from reading @@ -2154,7 +2154,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } /* exit if we failed to retrieve a buffer */ - if (!xdp_res && !skb) { + if (unlikely(!xdp_res && !skb)) { rx_ring->rx_stats.alloc_rx_buff_failed++; ixgbe_put_rx_buffer(rx_ring, rx_buffer, true); cleaned_count++; @@ -2168,14 +2168,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, continue; /* verify the packet layout is correct */ - if (xdp_res || ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) { + if (xdp_res || + unlikely(ixgbe_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - /* populate checksum, timestamp, VLAN, and protocol */ ixgbe_process_skb_fields(rx_ring, rx_desc, skb); @@ -2205,11 +2203,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } #endif /* IXGBE_FCOE */ + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + total_rx_packets++; + ixgbe_rx_skb(q_vector, skb); skb = NULL; - - /* update budget accounting */ - total_rx_packets++; } rx_ring->skb = skb; From d3a0b9de2f4d2a65d3d15b23eb48716fe2e5be09 Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 1 Jun 2026 09:51:36 +0200 Subject: [PATCH 05/10] ixgbe: support XDP multi-buffer on Rx path Implement XDP multi-buffer support for Rx fragmented packets using libeth_xdp. * Added MODULE_IMPORT_NS("LIBETH_XDP") and #include * Replaced struct sk_buff *skb in ixgbe_ring with struct libeth_xdp_buff_stash and ixgbe_clean_rx_ring skb cleanup with libeth_xdp_return_stash * Removed ixgbe_add_rx_frag() * Reorganized ixgbe_clean_rx_irq() and ixgbe_is_non_eop() * Updated ixgbe_xdp_setup() to allow multi-buffer XDP programs * Added NETDEV_XDP_ACT_RX_SG in xdp_features Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 3 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 115 +++++++----------- 2 files changed, 48 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 2d69ed0df4464f..c7cdd93d26712a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -34,6 +34,7 @@ #include #include +#include /* common prefix used by pr_<> macros */ #undef pr_fmt @@ -331,12 +332,12 @@ struct ixgbe_ring { struct ixgbe_tx_queue_stats tx_stats; struct ixgbe_rx_queue_stats rx_stats; }; - struct sk_buff *skb; /* partial Rx skb across non-EOP */ struct xdp_rxq_info xdp_rxq; spinlock_t tx_lock; /* used in XDP mode */ struct xsk_buff_pool *xsk_pool; u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */ u32 rx_buf_len; + struct libeth_xdp_buff_stash xdp_stash; } ____cacheline_internodealigned_in_smp; enum ixgbe_ring_f_enum { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 21c1642a1c2794..230e205ba2dee0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -176,6 +176,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); MODULE_IMPORT_NS("LIBIE_FWLOG"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL v2"); DEFINE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); @@ -1852,7 +1853,7 @@ void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, * ixgbe_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress + * @rsc_append_cnt: pointer to RSC append count accumulator * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -1861,7 +1862,7 @@ void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, **/ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) + u32 *rsc_append_cnt) { u32 ntc = rx_ring->next_to_clean + 1; @@ -1880,7 +1881,7 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, u32 rsc_cnt = le32_to_cpu(rsc_enabled); rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; - IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; + *rsc_append_cnt += rsc_cnt - 1; /* update ntc based on RSC value */ ntc = le32_to_cpu(rx_desc->wb.upper.status_error); @@ -1993,32 +1994,6 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return false; } -/** - * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: size of data in rx_buffer - * - * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. - **/ -static void ixgbe_add_rx_frag(const struct libeth_fqe *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ - u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; - - skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->netmem, rx_buffer->offset + hr, - size, rx_buffer->truesize); -} - static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, @@ -2101,16 +2076,16 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); LIBETH_XDP_ONSTACK_BUFF(xdp); - struct sk_buff *skb = rx_ring->skb; unsigned int xdp_xmit = 0; + u32 rsc_append_cnt = 0; int xdp_res = 0; - xdp->data = NULL; - xdp->base.rxq = &rx_ring->xdp_rxq; + libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; struct libeth_fqe *rx_buffer; + struct sk_buff *skb; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -2131,45 +2106,43 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, dma_rmb(); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + libeth_xdp_process_buff(xdp, rx_buffer, size); - /* retrieve a buffer from the ring */ - if (!skb) { - libeth_xdp_process_buff(xdp, rx_buffer, size); - xdp_res = ixgbe_run_xdp(adapter, rx_ring, xdp); - } else { - libeth_rx_sync_for_cpu(rx_buffer, size); - } - + cleaned_count++; + /* fetch next buffer in frame if non-eop */ + if (ixgbe_is_non_eop(rx_ring, rx_desc, &rsc_append_cnt)) + continue; + xdp_res = ixgbe_run_xdp(adapter, rx_ring, xdp); if (xdp_res) { if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) xdp_xmit |= xdp_res; total_rx_packets++; - total_rx_bytes += size; - } else if (skb) { - ixgbe_add_rx_frag(rx_buffer, skb, size); - } else { - skb = xdp_build_skb_from_buff(&xdp->base); + total_rx_bytes += xdp_get_buff_len(&xdp->base); + rsc_append_cnt = 0; + xdp->data = NULL; + continue; } + skb = xdp_build_skb_from_buff(&xdp->base); + xdp->data = NULL; + /* exit if we failed to retrieve a buffer */ - if (unlikely(!xdp_res && !skb)) { + if (unlikely(!skb)) { + rsc_append_cnt = 0; rx_ring->rx_stats.alloc_rx_buff_failed++; ixgbe_put_rx_buffer(rx_ring, rx_buffer, true); cleaned_count++; break; } - cleaned_count++; - - /* place incomplete frames back on ring for completion */ - if (ixgbe_is_non_eop(rx_ring, rx_desc, skb)) - continue; + /* apply RSC append count to skb if accumulated */ + IXGBE_CB(skb)->append_cnt = rsc_append_cnt; + rsc_append_cnt = 0; /* verify the packet layout is correct */ - if (xdp_res || - unlikely(ixgbe_cleanup_headers(rx_ring, rx_desc, skb))) { + if (unlikely(ixgbe_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } @@ -2208,10 +2181,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_packets++; ixgbe_rx_skb(q_vector, skb); - skb = NULL; } - rx_ring->skb = skb; + /* place incomplete frames back on ring for completion */ + libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); if (xdp_xmit & IXGBE_XDP_REDIR) xdp_do_flush(); @@ -5258,11 +5231,8 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) goto skip_free; } - /* Free Rx ring sk_buff */ - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } + /* Free Rx ring xdp stash */ + libeth_xdp_return_stash(&rx_ring->xdp_stash); for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; @@ -10402,13 +10372,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, return features; } -static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { - int i, frame_size = READ_ONCE(dev->mtu) + LIBETH_RX_LL_LEN; + u32 frame_size = READ_ONCE(dev->mtu) + LIBETH_RX_LL_LEN; struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct bpf_prog *old_prog; bool need_reset; int num_queues; + bool requires_mbuf; if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) return -EINVAL; @@ -10417,15 +10389,19 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) return -EINVAL; /* verify ixgbe ring attributes are sufficient for XDP */ - for (i = 0; i < adapter->num_rx_queues; i++) { + for (int i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i]; if (ring_is_rsc_enabled(ring)) return -EINVAL; } - if (frame_size > IXGBE_RXBUFFER_3K) - return -EINVAL; + requires_mbuf = frame_size > IXGBE_RX_PAGE_LEN(LIBETH_XDP_HEADROOM); + if (prog && !prog->aux->xdp_has_frags && requires_mbuf) { + NL_SET_ERR_MSG_MOD(extack, + "Configured MTU requires non-linear frames and XDP prog does not support frags"); + return -EOPNOTSUPP; + } /* if the number of cpus is much larger than the maximum of queues, * we should stop it and then return with ENOMEM like before. @@ -10450,7 +10426,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (!prog) xdp_features_clear_redirect_target(dev); } else { - for (i = 0; i < adapter->num_rx_queues; i++) { + for (int i = 0; i < adapter->num_rx_queues; i++) { WRITE_ONCE(adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); } @@ -10465,7 +10441,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (need_reset && prog) { num_queues = min_t(int, adapter->num_rx_queues, adapter->num_xdp_queues); - for (i = 0; i < num_queues; i++) + for (int i = 0; i < num_queues; i++) if (adapter->xdp_ring[i]->xsk_pool) (void)ixgbe_xsk_wakeup(adapter->netdev, i, XDP_WAKEUP_RX); @@ -10481,7 +10457,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: - return ixgbe_xdp_setup(dev, xdp->prog); + return ixgbe_xdp_setup(dev, xdp->prog, xdp->extack); case XDP_SETUP_XSK_POOL: return ixgbe_xsk_pool_setup(adapter, xdp->xsk.pool, xdp->xsk.queue_id); @@ -11312,7 +11288,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_SUPP_NOFCS; netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_XSK_ZEROCOPY; + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; /* MTU range: 68 - 9710 */ netdev->min_mtu = ETH_MIN_MTU; From ee84bfa6393f390be01923da8e6b1ac5eceae40c Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 1 Jun 2026 11:52:27 +0200 Subject: [PATCH 06/10] ixgbe: XDP_TX in multi-buffer through libeth Use libeth to support XDP_TX action for fragmented packets, replacing the legacy per-frame DMA mapping path with libeth page_pool-based bulk submission. Replaced ixgbe_xmit_xdp_ring() with the libeth XDP TX infrastructure. The XDP Tx ring now uses libeth_sqe instead of ixgbe_tx_buffer for its buffer tracking. Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 11 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 267 +++++++++++++++--- 2 files changed, 229 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index c7cdd93d26712a..88899ecbc13a0c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -257,6 +257,7 @@ enum ixgbe_ring_state_t { __IXGBE_TX_DETECT_HANG, __IXGBE_HANG_CHECK_ARMED, __IXGBE_TX_XDP_RING, + __IXGBE_TX_XDP_RING_PRIMED, __IXGBE_TX_DISABLED, }; @@ -292,20 +293,24 @@ struct ixgbe_ring { struct net_device *netdev; /* netdev ring belongs to */ struct bpf_prog __rcu *xdp_prog; union { - struct page_pool *pp; /* Rx ring */ + struct page_pool *pp; /* Rx and XDP rings */ struct device *dev; /* Tx ring */ }; void *desc; /* descriptor ring memory */ + union { + u32 truesize; /* Rx buffer full size */ + u32 pending; /* Sent-not-completed descriptors */ + }; union { struct libeth_fqe *rx_fqes; struct ixgbe_xsk_rx_buffer *rx_xsk_buffer_info; struct ixgbe_tx_buffer *tx_buffer_info; + struct libeth_sqe *xdp_sqes; }; unsigned long state; u8 __iomem *tail; dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ - u32 truesize; u16 count; /* amount of descriptors */ @@ -315,8 +320,8 @@ struct ixgbe_ring { * associated with this ring, which is * different for DCB and RSS modes */ - u16 next_to_use; u16 next_to_clean; + u32 next_to_use; unsigned long last_rx_timestamp; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 230e205ba2dee0..28142a05d332aa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1374,10 +1374,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_ipsec++; /* free the skb */ - if (ring_is_xdp(tx_ring)) - xdp_return_frame(tx_buffer->xdpf); - else - napi_consume_skb(tx_buffer->skb, napi_budget); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -1432,9 +1429,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_bytes); adapter->tx_ipsec += total_ipsec; - if (ring_is_xdp(tx_ring)) - return !!budget; - if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { if (adapter->hw.mac.type == ixgbe_mac_e610) ixgbe_handle_mdd_event(adapter, tx_ring); @@ -1995,18 +1989,185 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } -static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, +static void ixgbe_clean_xdp_num(struct ixgbe_ring *xdp_ring, bool in_napi, + u16 to_clean) +{ + struct libeth_xdpsq_napi_stats stats = { }; + u32 ntc = xdp_ring->next_to_clean; + struct xdp_frame_bulk cbulk; + struct libeth_cq_pp cp = { + .bq = &cbulk, + .dev = xdp_ring->dev, + .xss = &stats, + .napi = in_napi, + }; + + xdp_frame_bulk_init(&cbulk); + xdp_ring->pending -= to_clean; + + while (likely(to_clean--)) { + libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); + ntc++; + ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; + } + + xdp_ring->next_to_clean = ntc; + xdp_flush_frame_bulk(&cbulk); +} + +static u16 ixgbe_tx_get_num_sent(struct ixgbe_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + u16 to_clean = 0; + + while (likely(to_clean < xdp_ring->pending)) { + u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + union ixgbe_adv_tx_desc *rs_desc; + + if (!idx--) + break; + + rs_desc = IXGBE_TX_DESC(xdp_ring, idx); + + if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + xdp_ring->xdp_sqes[ntc].rs_idx = 0; + + to_clean += + (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + + ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + } + + return to_clean; +} + +static void ixgbe_clean_xdp_ring(struct ixgbe_ring *xdp_ring) +{ + ixgbe_clean_xdp_num(xdp_ring, false, xdp_ring->pending); +} + +static u32 ixgbe_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbe_ring *xdp_ring = xdpsq; + + if (unlikely(ixgbe_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { + u16 to_clean = ixgbe_tx_get_num_sent(xdp_ring); + + if (likely(to_clean)) + ixgbe_clean_xdp_num(xdp_ring, true, to_clean); + } + + if (unlikely(!test_bit(__IXGBE_TX_XDP_RING_PRIMED, + xdp_ring->state))) { + struct ixgbe_adv_tx_context_desc *context_desc; + + set_bit(__IXGBE_TX_XDP_RING_PRIMED, xdp_ring->state); + + context_desc = IXGBE_TX_CTXTDESC(xdp_ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + xdp_ring->next_to_use = 1; + xdp_ring->pending = 1; + + /* Finish descriptor writes before bumping tail */ + wmb(); + writel(1, xdp_ring->tail); + } + + *sq = (struct libeth_xdpsq) { + .count = xdp_ring->count, + .descs = xdp_ring->desc, + .lock = NULL, + .ntu = &xdp_ring->next_to_use, + .pending = &xdp_ring->pending, + .pool = NULL, + .sqes = xdp_ring->xdp_sqes, + }; + + return ixgbe_desc_unused(xdp_ring); +} + +static void ixgbe_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, + u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; + + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + desc.len; + + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd_type |= IXGBE_TXD_CMD_EOP; + + if (desc.flags & LIBETH_XDP_TX_FIRST) { + struct skb_shared_info *sinfo = sq->sqes[i].sinfo; + u16 full_len = desc.len + sinfo->xdp_frags_size; + + tx_desc->read.olinfo_status = + cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + } + + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbe_xdp_flush_tx, ixgbe_prep_xdp_sq, + ixgbe_xdp_xmit_desc); +LIBETH_XDP_DEFINE_END(); + +static void ixgbe_xdp_set_rs(struct ixgbe_ring *xdp_ring, u32 cached_ntu) +{ + u32 ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + union ixgbe_adv_tx_desc *desc; + + desc = IXGBE_TX_DESC(xdp_ring, ltu); + xdp_ring->xdp_sqes[cached_ntu].rs_idx = ltu + 1; + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); +} + +static void ixgbe_rx_finalize_xdp(struct libeth_xdp_tx_bulk *tx_bulk, + bool xdp_xmit, u32 cached_ntu) +{ + struct ixgbe_ring *xdp_ring = tx_bulk->xdpsq; + + if (!xdp_xmit) + goto unlock; + + if (tx_bulk->count) + ixgbe_xdp_flush_tx(tx_bulk, LIBETH_XDP_TX_DROP); + + ixgbe_xdp_set_rs(xdp_ring, cached_ntu); + + /* Finish descriptor writes before bumping tail */ + wmb(); + writel(xdp_ring->next_to_use, xdp_ring->tail); +unlock: + rcu_read_unlock(); +} + +static int ixgbe_run_xdp(struct libeth_xdp_tx_bulk *tx_bulk, + struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, struct libeth_xdp_buff *xdp) { int err, result = IXGBE_XDP_PASS; - struct bpf_prog *xdp_prog; - struct ixgbe_ring *ring; - struct xdp_frame *xdpf; + const struct bpf_prog *xdp_prog; u32 act; - xdp_prog = READ_ONCE(rx_ring->xdp_prog); - + xdp_prog = tx_bulk->prog; if (!xdp_prog) goto xdp_out; @@ -2017,17 +2178,10 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter, case XDP_PASS: break; case XDP_TX: - xdpf = xdp_convert_buff_to_frame(&xdp->base); - if (unlikely(!xdpf)) - goto out_failure; - ring = ixgbe_determine_xdp_ring(adapter); - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_lock(&ring->tx_lock); - result = ixgbe_xmit_xdp_ring(ring, xdpf); - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_unlock(&ring->tx_lock); - if (result == IXGBE_XDP_CONSUMED) - goto out_failure; + result = IXGBE_XDP_TX; + if (!libeth_xdp_tx_queue_bulk(tx_bulk, xdp, + ixgbe_xdp_flush_tx)) + result = IXGBE_XDP_CONSUMED; break; case XDP_REDIRECT: err = xdp_do_redirect(adapter->netdev, &xdp->base, xdp_prog); @@ -2075,12 +2229,20 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); + u32 rsc_append_cnt = 0; + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); LIBETH_XDP_ONSTACK_BUFF(xdp); unsigned int xdp_xmit = 0; - u32 rsc_append_cnt = 0; + u32 cached_ntu; int xdp_res = 0; libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); + libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + if (xdp_tx_bulk.prog) + cached_ntu = + ((struct ixgbe_ring *)xdp_tx_bulk.xdpsq)->next_to_use; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -2113,11 +2275,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (ixgbe_is_non_eop(rx_ring, rx_desc, &rsc_append_cnt)) continue; - xdp_res = ixgbe_run_xdp(adapter, rx_ring, xdp); + xdp_res = ixgbe_run_xdp(&xdp_tx_bulk, adapter, rx_ring, xdp); if (xdp_res) { if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) xdp_xmit |= xdp_res; + xdp->data = NULL; total_rx_packets++; total_rx_bytes += xdp_get_buff_len(&xdp->base); rsc_append_cnt = 0; @@ -2189,11 +2352,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (xdp_xmit & IXGBE_XDP_REDIR) xdp_do_flush(); - if (xdp_xmit & IXGBE_XDP_TX) { - struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter); - - ixgbe_xdp_ring_update_tail_locked(ring); - } + ixgbe_rx_finalize_xdp(&xdp_tx_bulk, xdp_xmit & IXGBE_XDP_TX, + cached_ntu); ixgbe_update_rx_ring_stats(rx_ring, q_vector, total_rx_packets, total_rx_bytes); @@ -3145,9 +3305,13 @@ int ixgbe_poll(struct napi_struct *napi, int budget) #endif ixgbe_for_each_ring(ring, q_vector->tx) { - bool wd = ring->xsk_pool ? - ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) : - ixgbe_clean_tx_irq(q_vector, ring, budget); + bool wd; + + if (ring_is_xdp(ring) && !ring->xsk_pool) + continue; + wd = ring->xsk_pool ? + ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) : + ixgbe_clean_tx_irq(q_vector, ring, budget); if (!wd) clean_complete = false; @@ -3523,11 +3687,21 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, ring->queue_index); } - clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state); + clear_bit(__IXGBE_HANG_CHECK_ARMED, ring->state); + clear_bit(__IXGBE_TX_XDP_RING_PRIMED, ring->state); /* reinitialize tx_buffer_info */ - memset(ring->tx_buffer_info, 0, - sizeof(struct ixgbe_tx_buffer) * ring->count); + if (!ring_is_xdp(ring)) + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbe_tx_buffer) * ring->count); + else + memset(ring->xdp_sqes, 0, + max(sizeof(struct ixgbe_tx_buffer), + sizeof(struct libeth_sqe)) * ring->count); + + ring->next_to_use = 0; + ring->next_to_clean = 0; + ring->pending = 0; /* enable queue */ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl); @@ -6044,10 +6218,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - if (ring_is_xdp(tx_ring)) - xdp_return_frame(tx_buffer->xdpf); - else - dev_kfree_skb_any(tx_buffer->skb); + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -6088,8 +6259,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) } /* reset BQL for queue */ - if (!ring_is_xdp(tx_ring)) - netdev_tx_reset_queue(txring_txq(tx_ring)); + netdev_tx_reset_queue(txring_txq(tx_ring)); out: /* reset next_to_use and next_to_clean */ @@ -6120,7 +6290,7 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbe_clean_tx_ring(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++) - ixgbe_clean_tx_ring(adapter->xdp_ring[i]); + ixgbe_clean_xdp_ring(adapter->xdp_ring[i]); } static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) @@ -6519,7 +6689,9 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) int ring_node = NUMA_NO_NODE; int size; - size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; + size = (!ring_is_xdp(tx_ring) ? sizeof(struct ixgbe_tx_buffer) : + max(sizeof(struct ixgbe_tx_buffer), + sizeof(struct libeth_sqe))) * tx_ring->count; if (tx_ring->q_vector) ring_node = tx_ring->q_vector->numa_node; @@ -6734,7 +6906,10 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter) **/ void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring) { - ixgbe_clean_tx_ring(tx_ring); + if (!ring_is_xdp(tx_ring)) + ixgbe_clean_tx_ring(tx_ring); + else + ixgbe_clean_xdp_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; From 8b4bb7a6564a6285b8622dddf5f9a47af93cdcd3 Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 1 Jun 2026 13:00:54 +0200 Subject: [PATCH 07/10] ixgbe: support XDP_REDIRECT and .ndo_xdp_xmit through libeth * Replaced the driver-specific static_branch locking mechanism with libeth_xdpsq_lock infrastructure for XDP queue sharing. * Updated ixgbe_xdp_xmit() to use libeth_xdp_xmit_do_bulk(). * Adjusted ixgbe_xdp_xmit_desc() to handle both XDP_TX buffers and XDP_XMIT frames. * Updated the XSK path to use libeth_xdpsq_lock/libeth_xdpsq_unlock. Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 9 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 182 +++++------------- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 7 +- 3 files changed, 52 insertions(+), 146 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 88899ecbc13a0c..ec3796be4a034a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -338,6 +338,8 @@ struct ixgbe_ring { struct ixgbe_rx_queue_stats rx_stats; }; struct xdp_rxq_info xdp_rxq; + struct libeth_xdpsq_lock xdpq_lock; + u32 cached_ntu; spinlock_t tx_lock; /* used in XDP mode */ struct xsk_buff_pool *xsk_pool; u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */ @@ -373,8 +375,6 @@ enum ixgbe_ring_f_enum { #define IXGBE_MAX_TX_DESCRIPTORS 40 #define IXGBE_MAX_TX_VF_HANGS 4 -DECLARE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); - struct ixgbe_ring_feature { u16 limit; /* upper limit on feature indices */ u16 indices; /* current value of indices */ @@ -774,10 +774,7 @@ static inline struct ixgbe_adapter *ixgbe_from_netdev(struct net_device *netdev) static inline int ixgbe_determine_xdp_q_idx(int cpu) { - if (static_key_enabled(&ixgbe_xdp_locking_key)) - return cpu % IXGBE_MAX_XDP_QS; - else - return cpu; + return cpu % IXGBE_MAX_XDP_QS; } static inline diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 28142a05d332aa..a4b0803e1f004c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -179,9 +179,6 @@ MODULE_IMPORT_NS("LIBETH"); MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL v2"); -DEFINE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); -EXPORT_SYMBOL(ixgbe_xdp_locking_key); - static struct workqueue_struct *ixgbe_wq; static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); @@ -2046,12 +2043,14 @@ static u16 ixgbe_tx_get_num_sent(struct ixgbe_ring *xdp_ring) static void ixgbe_clean_xdp_ring(struct ixgbe_ring *xdp_ring) { ixgbe_clean_xdp_num(xdp_ring, false, xdp_ring->pending); + libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); } static u32 ixgbe_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) { struct ixgbe_ring *xdp_ring = xdpsq; + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); if (unlikely(ixgbe_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { u16 to_clean = ixgbe_tx_get_num_sent(xdp_ring); @@ -2085,7 +2084,7 @@ static u32 ixgbe_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) *sq = (struct libeth_xdpsq) { .count = xdp_ring->count, .descs = xdp_ring->desc, - .lock = NULL, + .lock = &xdp_ring->xdpq_lock, .ntu = &xdp_ring->next_to_use, .pending = &xdp_ring->pending, .pool = NULL, @@ -2111,8 +2110,13 @@ static void ixgbe_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, cmd_type |= IXGBE_TXD_CMD_EOP; if (desc.flags & LIBETH_XDP_TX_FIRST) { - struct skb_shared_info *sinfo = sq->sqes[i].sinfo; - u16 full_len = desc.len + sinfo->xdp_frags_size; + struct libeth_sqe *sqe = &sq->sqes[i]; + struct skb_shared_info *sinfo; + u16 full_len; + + sinfo = sqe->type == LIBETH_SQE_XDP_TX ? sqe->sinfo : + xdp_get_shared_info_from_frame(sqe->xdpf); + full_len = desc.len + sinfo->xdp_frags_size; tx_desc->read.olinfo_status = cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | @@ -2123,87 +2127,36 @@ static void ixgbe_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); } -LIBETH_XDP_DEFINE_START(); -LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbe_xdp_flush_tx, ixgbe_prep_xdp_sq, - ixgbe_xdp_xmit_desc); -LIBETH_XDP_DEFINE_END(); - -static void ixgbe_xdp_set_rs(struct ixgbe_ring *xdp_ring, u32 cached_ntu) +static void ixgbe_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) { - u32 ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + struct ixgbe_ring *xdp_ring = xdpsq; union ixgbe_adv_tx_desc *desc; + u32 ltu; + + if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || + xdp_ring->cached_ntu == xdp_ring->next_to_use) + return; + ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; desc = IXGBE_TX_DESC(xdp_ring, ltu); - xdp_ring->xdp_sqes[cached_ntu].rs_idx = ltu + 1; + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); -} - -static void ixgbe_rx_finalize_xdp(struct libeth_xdp_tx_bulk *tx_bulk, - bool xdp_xmit, u32 cached_ntu) -{ - struct ixgbe_ring *xdp_ring = tx_bulk->xdpsq; - - if (!xdp_xmit) - goto unlock; - - if (tx_bulk->count) - ixgbe_xdp_flush_tx(tx_bulk, LIBETH_XDP_TX_DROP); - - ixgbe_xdp_set_rs(xdp_ring, cached_ntu); + xdp_ring->cached_ntu = xdp_ring->next_to_use; /* Finish descriptor writes before bumping tail */ wmb(); writel(xdp_ring->next_to_use, xdp_ring->tail); -unlock: - rcu_read_unlock(); } -static int ixgbe_run_xdp(struct libeth_xdp_tx_bulk *tx_bulk, - struct ixgbe_adapter *adapter, - struct ixgbe_ring *rx_ring, - struct libeth_xdp_buff *xdp) -{ - int err, result = IXGBE_XDP_PASS; - const struct bpf_prog *xdp_prog; - u32 act; - - xdp_prog = tx_bulk->prog; - if (!xdp_prog) - goto xdp_out; - - prefetchw(xdp->base.data_hard_start); /* xdp_frame write */ - - act = bpf_prog_run_xdp(xdp_prog, &xdp->base); - switch (act) { - case XDP_PASS: - break; - case XDP_TX: - result = IXGBE_XDP_TX; - if (!libeth_xdp_tx_queue_bulk(tx_bulk, xdp, - ixgbe_xdp_flush_tx)) - result = IXGBE_XDP_CONSUMED; - break; - case XDP_REDIRECT: - err = xdp_do_redirect(adapter->netdev, &xdp->base, xdp_prog); - if (err) - goto out_failure; - result = IXGBE_XDP_REDIR; - break; - default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); - fallthrough; - case XDP_ABORTED: -out_failure: - trace_xdp_exception(rx_ring->netdev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = IXGBE_XDP_CONSUMED; - libeth_xdp_return_buff(xdp); - break; - } -xdp_out: - return result; -} +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbe_xdp_flush_tx, ixgbe_prep_xdp_sq, + ixgbe_xdp_xmit_desc); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbe_xdp_flush_xmit, ixgbe_prep_xdp_sq, + ixgbe_xdp_xmit_desc); +LIBETH_XDP_DEFINE_RUN_PROG(static ixgbe_xdp_run_prog, ixgbe_xdp_flush_tx); +LIBETH_XDP_DEFINE_FINALIZE(static ixgbe_xdp_finalize_xdp_napi, + ixgbe_xdp_flush_tx, ixgbe_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); /** * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf @@ -2232,17 +2185,11 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, u32 rsc_append_cnt = 0; LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); LIBETH_XDP_ONSTACK_BUFF(xdp); - unsigned int xdp_xmit = 0; - u32 cached_ntu; - int xdp_res = 0; libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, adapter->netdev, adapter->xdp_ring, adapter->num_xdp_queues); - if (xdp_tx_bulk.prog) - cached_ntu = - ((struct ixgbe_ring *)xdp_tx_bulk.xdpsq)->next_to_use; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -2275,11 +2222,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (ixgbe_is_non_eop(rx_ring, rx_desc, &rsc_append_cnt)) continue; - xdp_res = ixgbe_run_xdp(&xdp_tx_bulk, adapter, rx_ring, xdp); - if (xdp_res) { - if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) - xdp_xmit |= xdp_res; - + if (xdp_tx_bulk.prog && + !ixgbe_xdp_run_prog(xdp, &xdp_tx_bulk)) { xdp->data = NULL; total_rx_packets++; total_rx_bytes += xdp_get_buff_len(&xdp->base); @@ -2349,11 +2293,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* place incomplete frames back on ring for completion */ libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - if (xdp_xmit & IXGBE_XDP_REDIR) - xdp_do_flush(); - - ixgbe_rx_finalize_xdp(&xdp_tx_bulk, xdp_xmit & IXGBE_XDP_TX, - cached_ntu); + ixgbe_xdp_finalize_xdp_napi(&xdp_tx_bulk); ixgbe_update_rx_ring_stats(rx_ring, q_vector, total_rx_packets, total_rx_bytes); @@ -3691,17 +3631,21 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, clear_bit(__IXGBE_TX_XDP_RING_PRIMED, ring->state); /* reinitialize tx_buffer_info */ - if (!ring_is_xdp(ring)) + if (!ring_is_xdp(ring)) { memset(ring->tx_buffer_info, 0, sizeof(struct ixgbe_tx_buffer) * ring->count); - else + } else { memset(ring->xdp_sqes, 0, max(sizeof(struct ixgbe_tx_buffer), sizeof(struct libeth_sqe)) * ring->count); + libeth_xdpsq_get(&ring->xdpq_lock, ring->netdev, + num_possible_cpus() > adapter->num_xdp_queues); + } ring->next_to_use = 0; ring->next_to_clean = 0; ring->pending = 0; + ring->cached_ntu = 0; /* enable queue */ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl); @@ -6669,10 +6613,6 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, set_bit(0, adapter->fwd_bitmask); set_bit(__IXGBE_DOWN, &adapter->state); - /* enable locking for XDP_TX if we have more CPUs than queues */ - if (nr_cpu_ids > IXGBE_MAX_XDP_QS) - static_branch_enable(&ixgbe_xdp_locking_key); - return 0; } @@ -10653,20 +10593,15 @@ void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring) void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring) { - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_lock(&ring->tx_lock); + libeth_xdpsq_lock(&ring->xdpq_lock); ixgbe_xdp_ring_update_tail(ring); - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_unlock(&ring->tx_lock); + libeth_xdpsq_unlock(&ring->xdpq_lock); } static int ixgbe_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); - struct ixgbe_ring *ring; - int nxmit = 0; - int i; if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) return -ENETDOWN; @@ -10675,39 +10610,14 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, !netif_running(adapter->netdev)) return -ENETDOWN; - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) - return -EINVAL; - - /* During program transitions its possible adapter->xdp_prog is assigned - * but ring has not been configured yet. In this case simply abort xmit. - */ - ring = adapter->xdp_prog ? ixgbe_determine_xdp_ring(adapter) : NULL; - if (unlikely(!ring)) + if (unlikely(!adapter->num_xdp_queues)) return -ENXIO; - if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state))) - return -ENXIO; - - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_lock(&ring->tx_lock); - - for (i = 0; i < n; i++) { - struct xdp_frame *xdpf = frames[i]; - int err; - - err = ixgbe_xmit_xdp_ring(ring, xdpf); - if (err != IXGBE_XDP_TX) - break; - nxmit++; - } - - if (unlikely(flags & XDP_XMIT_FLUSH)) - ixgbe_xdp_ring_update_tail(ring); - - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_unlock(&ring->tx_lock); - - return nxmit; + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, + adapter->xdp_ring, + adapter->num_xdp_queues, + ixgbe_xdp_flush_xmit, + ixgbe_xdp_rs_and_bump); } static const struct net_device_ops ixgbe_netdev_ops = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 15425d3c367872..1c5db24caad28e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "ixgbe.h" #include "ixgbe_txrx_common.h" @@ -126,11 +127,9 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, if (unlikely(!xdpf)) goto out_failure; ring = ixgbe_determine_xdp_ring(adapter); - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_lock(&ring->tx_lock); + libeth_xdpsq_lock(&ring->xdpq_lock); result = ixgbe_xmit_xdp_ring(ring, xdpf); - if (static_branch_unlikely(&ixgbe_xdp_locking_key)) - spin_unlock(&ring->tx_lock); + libeth_xdpsq_unlock(&ring->xdpq_lock); if (result == IXGBE_XDP_CONSUMED) goto out_failure; break; From fcb49186e87b788d45280a0faaf842f44efc6245 Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 1 Jun 2026 15:34:38 +0200 Subject: [PATCH 08/10] ixgbe: add pseudo header split Introduce pseudo header split support in the ixgbe driver. * Added IXGBE_RX_SRRCTL_BUF_SIZE macro and IXGBE_FLAG2_HSPLIT flag * Added hdr_truesize, hdr_fqes, hdr_pp, hdr_buf_len to ixgbe_ring * Added ixgbe_rx_hsplit_wa() to copy header from data to header buffer * Added ixgbe_rx_create_pp()/ixgbe_rx_destroy_pp() helpers * Updated ixgbe_alloc_rx_buffers to allocate header buffers * Updated ixgbe_clean_rx_irq to process header split * Refactored ixgbe_setup_rx_resources to use ixgbe_rx_create_pp * Refactored ixgbe_free_rx_resources to use ixgbe_rx_destroy_pp * Set IXGBE_FLAG2_HSPLIT for 82599 in ixgbe_sw_init * Updated ixgbe_xdp_setup to require mbuf frags with hsplit Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 7 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 190 +++++++++++++++--- 2 files changed, 166 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index ec3796be4a034a..629f5b9707bcc7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -77,6 +77,8 @@ #define IXGBE_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ IXGBE_SRRCTL_BSIZEPKT_STEP)) +#define IXGBE_RX_SRRCTL_BUF_SIZE(mtu) (ALIGN((mtu) + LIBETH_RX_LL_LEN, \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) /* * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we @@ -301,6 +303,7 @@ struct ixgbe_ring { u32 truesize; /* Rx buffer full size */ u32 pending; /* Sent-not-completed descriptors */ }; + u32 hdr_truesize; /* Rx header buffer full size */ union { struct libeth_fqe *rx_fqes; struct ixgbe_xsk_rx_buffer *rx_xsk_buffer_info; @@ -337,12 +340,15 @@ struct ixgbe_ring { struct ixgbe_tx_queue_stats tx_stats; struct ixgbe_rx_queue_stats rx_stats; }; + struct libeth_fqe *hdr_fqes; + struct page_pool *hdr_pp; struct xdp_rxq_info xdp_rxq; struct libeth_xdpsq_lock xdpq_lock; u32 cached_ntu; spinlock_t tx_lock; /* used in XDP mode */ struct xsk_buff_pool *xsk_pool; u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */ + u32 hdr_buf_len; u32 rx_buf_len; struct libeth_xdp_buff_stash xdp_stash; } ____cacheline_internodealigned_in_smp; @@ -596,6 +602,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_MOD_POWER_UNSUPPORTED BIT(22) #define IXGBE_FLAG2_API_MISMATCH BIT(23) #define IXGBE_FLAG2_FW_ROLLBACK BIT(24) +#define IXGBE_FLAG2_HSPLIT BIT(25) /* Tx fast path data */ int num_tx_queues; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a4b0803e1f004c..09dfad725332d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1721,6 +1721,12 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) .truesize = rx_ring->truesize, .count = rx_ring->count, }; + const struct libeth_fq_fp hdr_fq = { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + .truesize = rx_ring->hdr_truesize, + .count = rx_ring->count, + }; u16 ntu = rx_ring->next_to_use; /* nothing to do */ @@ -1737,6 +1743,14 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) rx_desc->read.pkt_addr = cpu_to_le64(addr); + if (hdr_fq.pp) { + addr = libeth_rx_alloc(&hdr_fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + libeth_rx_recycle_slow(fq.fqes[ntu].netmem); + break; + } + } + rx_desc++; ntu++; @@ -2158,6 +2172,32 @@ LIBETH_XDP_DEFINE_FINALIZE(static ixgbe_xdp_finalize_xdp_napi, ixgbe_xdp_flush_tx, ixgbe_xdp_rs_and_bump); LIBETH_XDP_DEFINE_END(); +static u32 ixgbe_rx_hsplit_wa(const struct libeth_fqe *hdr, + struct libeth_fqe *buf, u32 data_len) +{ + u32 copy = data_len <= L1_CACHE_BYTES ? data_len : ETH_HLEN; + struct page *hdr_page, *buf_page; + const void *src; + void *dst; + + if (unlikely(netmem_is_net_iov(buf->netmem)) || + !libeth_rx_sync_for_cpu(buf, copy)) + return 0; + + hdr_page = __netmem_to_page(hdr->netmem); + buf_page = __netmem_to_page(buf->netmem); + + dst = page_address(hdr_page) + hdr->offset + + pp_page_to_nmdesc(hdr_page)->pp->p.offset; + src = page_address(buf_page) + buf->offset + + pp_page_to_nmdesc(buf_page)->pp->p.offset; + + memcpy(dst, src, LARGEST_ALIGN(copy)); + buf->offset += copy; + + return copy; +} + /** * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @q_vector: structure containing interrupt and ring information @@ -2215,6 +2255,23 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, dma_rmb(); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + + if (unlikely(rx_ring->hdr_pp)) { + struct libeth_fqe *hdr_buff; + unsigned int hdr_size = 0; + + hdr_buff = &rx_ring->hdr_fqes[rx_ring->next_to_clean]; + + if (!xdp->data) { + hdr_size = ixgbe_rx_hsplit_wa(hdr_buff, + rx_buffer, + size); + size -= hdr_size ? : size; + } + + libeth_xdp_process_buff(xdp, hdr_buff, hdr_size); + } + libeth_xdp_process_buff(xdp, rx_buffer, size); cleaned_count++; @@ -6510,6 +6567,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, case ixgbe_mac_82599EB: if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; + adapter->flags2 |= IXGBE_FLAG2_HSPLIT; break; case ixgbe_mac_X540: fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw)); @@ -6717,16 +6775,33 @@ static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring) return q_vector ? q_vector->napi.napi_id : 0; } -/** - * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) - * @adapter: pointer to ixgbe_adapter - * @rx_ring: rx descriptor ring (for a specific queue) to setup - * - * Returns 0 on success, negative on failure - **/ -int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, - struct ixgbe_ring *rx_ring) +static void ixgbe_rx_destroy_pp(struct ixgbe_ring *rx_ring) { + struct libeth_fq fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + + if (!rx_ring->hdr_pp) + return; + + fq = (struct libeth_fq) { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->hdr_fqes = NULL; + rx_ring->hdr_pp = NULL; +} + +static int ixgbe_rx_create_pp(struct ixgbe_ring *rx_ring, void *napi_dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev); struct libeth_fq fq = { .count = rx_ring->count, .nid = NUMA_NO_NODE, @@ -6736,6 +6811,74 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, LIBETH_XDP_HEADROOM : LIBETH_SKB_HEADROOM), }; + u32 frame_size; + int ret; + + if (rx_ring->q_vector) + fq.nid = rx_ring->q_vector->numa_node; + else + fq.no_napi = true; + + /* Some HW requires DMA write sizes to be aligned to 1K, + * which warrants fake header split usage, but this is + * not an issue if the frame size is at its maximum of 3K + */ + frame_size = + IXGBE_RX_SRRCTL_BUF_SIZE(READ_ONCE(rx_ring->netdev->mtu)); + fq.hsplit = (adapter->flags2 & IXGBE_FLAG2_HSPLIT) && + frame_size < fq.buf_len; + + ret = libeth_rx_fq_create(&fq, napi_dev); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; + + if (!fq.hsplit) + return 0; + + fq = (struct libeth_fq) { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_HDR, + .xdp = !!rx_ring->xdp_prog, + .idx = rx_ring->queue_index, + }; + + if (rx_ring->q_vector) + fq.nid = rx_ring->q_vector->numa_node; + else + fq.no_napi = true; + + ret = libeth_rx_fq_create(&fq, napi_dev); + if (ret) + goto err; + + rx_ring->hdr_pp = fq.pp; + rx_ring->hdr_fqes = fq.fqes; + rx_ring->hdr_truesize = fq.truesize; + rx_ring->hdr_buf_len = fq.buf_len; + + return 0; + +err: + ixgbe_rx_destroy_pp(rx_ring); + return ret; +} + +/** + * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) + * @adapter: pointer to ixgbe_adapter + * @rx_ring: rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, + struct ixgbe_ring *rx_ring) +{ struct device *dev = &adapter->pdev->dev; int orig_node = dev_to_node(dev); void *napi_dev; @@ -6743,23 +6886,16 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, int ret; if (rx_ring->q_vector) { - fq.nid = rx_ring->q_vector->numa_node; ring_node = rx_ring->q_vector->numa_node; napi_dev = &rx_ring->q_vector->napi; } else { - fq.no_napi = true; napi_dev = &adapter->pdev->dev; } - ret = libeth_rx_fq_create(&fq, napi_dev); + ret = ixgbe_rx_create_pp(rx_ring, napi_dev); if (ret) return ret; - rx_ring->pp = fq.pp; - rx_ring->rx_fqes = fq.fqes; - rx_ring->truesize = fq.truesize; - rx_ring->rx_buf_len = fq.buf_len; - /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); @@ -6797,9 +6933,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, rx_ring->desc = NULL; err_destroy_fq: dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ixgbe_rx_destroy_pp(rx_ring); return ret; } @@ -6890,11 +7024,6 @@ static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter) **/ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) { - struct libeth_fq fq = { - .fqes = rx_ring->rx_fqes, - .pp = rx_ring->pp, - }; - ixgbe_clean_rx_ring(rx_ring); rx_ring->xdp_prog = NULL; @@ -6904,7 +7033,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) if (!rx_ring->desc) return; - dma_free_coherent(fq.pp->p.dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(rx_ring->pp->p.dev, rx_ring->size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; @@ -6912,9 +7041,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) kvfree(rx_ring->rx_xsk_buffer_info); rx_ring->rx_xsk_buffer_info = NULL; - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ixgbe_rx_destroy_pp(rx_ring); } /** @@ -10511,10 +10638,11 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog, return -EINVAL; } - requires_mbuf = frame_size > IXGBE_RX_PAGE_LEN(LIBETH_XDP_HEADROOM); + requires_mbuf = frame_size > IXGBE_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || + adapter->flags2 & IXGBE_FLAG2_HSPLIT; if (prog && !prog->aux->xdp_has_frags && requires_mbuf) { NL_SET_ERR_MSG_MOD(extack, - "Configured MTU requires non-linear frames and XDP prog does not support frags"); + "HW limitations or configured MTU requires non-linear frames and XDP prog does not support frags"); return -EOPNOTSUPP; } From 6313904371b1ea971a7b9e0bd729ecdec2f41bbd Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 1 Jun 2026 16:06:54 +0200 Subject: [PATCH 09/10] ixgbe: reconfigure page pool when reallocating buffers * Moved page pool creation from ixgbe_setup_rx_resources() to ixgbe_configure_rx_ring() * Moved page pool destruction to ixgbe_clean_all_rx_rings() and ixgbe_free_rx_resources() * Added ixgbe_dma_dev_from_ring() helper to obtain DMA device without relying on page pool pointer. * Handled XSK path in ixgbe_configure_rx_ring() Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 82 ++++++++++--------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 09dfad725332d3..79d5baf675984c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4218,16 +4218,25 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, u64 rdba = ring->dma; u32 rxdctl; u8 reg_idx = ring->reg_idx; + void *napi_dev; - xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + ixgbe_rx_destroy_pp(ring); ring->xsk_pool = ixgbe_xsk_pool(adapter, ring); if (ring->xsk_pool) { + __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ixgbe_rx_napi_id(ring), 0); WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL)); xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); } else { - xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, ring->pp); + if (ring->q_vector) + napi_dev = &ring->q_vector->napi; + else + napi_dev = &adapter->pdev->dev; + + ixgbe_rx_create_pp(ring, napi_dev); } /* disable queue to avoid use of these values while updating state */ @@ -6276,8 +6285,10 @@ static void ixgbe_clean_all_rx_rings(struct ixgbe_adapter *adapter) { int i; - for (i = 0; i < adapter->num_rx_queues; i++) + for (i = 0; i < adapter->num_rx_queues; i++) { ixgbe_clean_rx_ring(adapter->rx_ring[i]); + ixgbe_rx_destroy_pp(adapter->rx_ring[i]); + } } /** @@ -6782,6 +6793,14 @@ static void ixgbe_rx_destroy_pp(struct ixgbe_ring *rx_ring) .fqes = rx_ring->rx_fqes, }; + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + + if (!fq.pp) + return; + libeth_rx_fq_destroy(&fq); rx_ring->rx_fqes = NULL; rx_ring->pp = NULL; @@ -6837,6 +6856,16 @@ static int ixgbe_rx_create_pp(struct ixgbe_ring *rx_ring, void *napi_dev) rx_ring->truesize = fq.truesize; rx_ring->rx_buf_len = fq.buf_len; + /* XDP RX-queue info */ + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, + ixgbe_rx_napi_id(rx_ring), + rx_ring->truesize); + if (ret) + goto err; + + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, rx_ring->pp); + if (!fq.hsplit) return 0; @@ -6869,6 +6898,13 @@ static int ixgbe_rx_create_pp(struct ixgbe_ring *rx_ring, void *napi_dev) return ret; } +static struct device *ixgbe_dma_dev_from_ring(struct ixgbe_ring *ring) +{ + struct ixgbe_adapter *adapter = netdev_priv(ring->netdev); + + return &adapter->pdev->dev; +} + /** * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: pointer to ixgbe_adapter @@ -6881,20 +6917,10 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, { struct device *dev = &adapter->pdev->dev; int orig_node = dev_to_node(dev); - void *napi_dev; int ring_node = NUMA_NO_NODE; - int ret; - if (rx_ring->q_vector) { + if (rx_ring->q_vector) ring_node = rx_ring->q_vector->numa_node; - napi_dev = &rx_ring->q_vector->napi; - } else { - napi_dev = &adapter->pdev->dev; - } - - ret = ixgbe_rx_create_pp(rx_ring, napi_dev); - if (ret) - return ret; /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); @@ -6909,32 +6935,16 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { - ret = -ENOMEM; - goto err_destroy_fq; + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); + return -ENOMEM; } rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - /* XDP RX-queue info */ - ret = xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, - ixgbe_rx_napi_id(rx_ring)); - if (ret < 0) - goto err_free_desc; - WRITE_ONCE(rx_ring->xdp_prog, adapter->xdp_prog); return 0; - -err_free_desc: - dma_free_coherent(dev, rx_ring->size, - rx_ring->desc, rx_ring->dma); - rx_ring->desc = NULL; -err_destroy_fq: - dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); - ixgbe_rx_destroy_pp(rx_ring); - return ret; } /** @@ -7025,23 +7035,21 @@ static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter) void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) { ixgbe_clean_rx_ring(rx_ring); + ixgbe_rx_destroy_pp(rx_ring); rx_ring->xdp_prog = NULL; - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); /* if not set, then don't free */ if (!rx_ring->desc) return; - dma_free_coherent(rx_ring->pp->p.dev, rx_ring->size, rx_ring->desc, - rx_ring->dma); + dma_free_coherent(ixgbe_dma_dev_from_ring(rx_ring), rx_ring->size, + rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; kvfree(rx_ring->rx_xsk_buffer_info); rx_ring->rx_xsk_buffer_info = NULL; - - ixgbe_rx_destroy_pp(rx_ring); } /** From e0b944207e61d31d9d89cebe81ca3844975b666f Mon Sep 17 00:00:00 2001 From: Magdalena Pytel Date: Mon, 1 Jun 2026 16:11:36 +0200 Subject: [PATCH 10/10] ixgbe: allow changing MTU when XDP program is attached Previously, MTU changes were restricted to a fixed max frame size when XDP was active. With libeth page pool and multi-buffer XDP support, the restriction can be loose. * Added ixgbe_xdp_mtu_ok() to check if the given MTU is compatible with the attached XDP program. * Updated ixgbe_change_mtu() to use ixgbe_xdp_mtu_ok() instead of the hard-coded IXGBE_RXBUFFER_3K limit. * Updated ixgbe_xdp_setup() to use ixgbe_xdp_mtu_ok() instead of inline requires_mbuf logic. * Added error handling in ixgbe_configure_rx_ring() Signed-off-by: Magdalena Pytel --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 79d5baf675984c..5219e15d098b28 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1729,8 +1729,8 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) }; u16 ntu = rx_ring->next_to_use; - /* nothing to do */ - if (unlikely(!cleaned_count)) + /* nothing to do or page pool is not present */ + if (unlikely(!cleaned_count || !fq.pp)) return; rx_desc = IXGBE_RX_DESC(rx_ring, ntu); @@ -4219,6 +4219,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, u32 rxdctl; u8 reg_idx = ring->reg_idx; void *napi_dev; + int err; ixgbe_rx_destroy_pp(ring); ring->xsk_pool = ixgbe_xsk_pool(adapter, ring); @@ -4236,7 +4237,13 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, else napi_dev = &adapter->pdev->dev; - ixgbe_rx_create_pp(ring, napi_dev); + err = ixgbe_rx_create_pp(ring, napi_dev); + if (err) { + netdev_err(ring->netdev, + "Failed to create Page Pool for buffer allocation: (%pe), RxQ %d is disabled, driver reload may be needed\n", + ERR_PTR(err), ring->queue_index); + return; + } } /* disable queue to avoid use of these values while updating state */ @@ -7071,13 +7078,16 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) ixgbe_free_rx_resources(adapter->rx_ring[i]); } -/** - * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP - * @adapter: device handle, pointer to adapter - */ -static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) +static bool ixgbe_xdp_mtu_ok(const struct ixgbe_adapter *adapter, + const struct bpf_prog *prog, unsigned int mtu) { - return IXGBE_RXBUFFER_3K; + u32 frame_size = mtu + LIBETH_RX_LL_LEN; + bool requires_mbuf; + + requires_mbuf = frame_size > IXGBE_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || + adapter->flags2 & IXGBE_FLAG2_HSPLIT; + + return prog->aux->xdp_has_frags || !requires_mbuf; } /** @@ -7091,13 +7101,11 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); - if (ixgbe_enabled_xdp_adapter(adapter)) { - int new_frame_size = new_mtu + LIBETH_RX_LL_LEN; - - if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { - e_warn(probe, "Requested MTU size is not supported with XDP\n"); - return -EINVAL; - } + if (adapter->xdp_prog && + !ixgbe_xdp_mtu_ok(adapter, adapter->xdp_prog, new_mtu)) { + netdev_warn(netdev, + "MTU value provided cannot be set while current XDP program is attached\n"); + return -EPERM; } /* @@ -10625,12 +10633,10 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { - u32 frame_size = READ_ONCE(dev->mtu) + LIBETH_RX_LL_LEN; struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct bpf_prog *old_prog; bool need_reset; int num_queues; - bool requires_mbuf; if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) return -EINVAL; @@ -10646,9 +10652,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog, return -EINVAL; } - requires_mbuf = frame_size > IXGBE_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || - adapter->flags2 & IXGBE_FLAG2_HSPLIT; - if (prog && !prog->aux->xdp_has_frags && requires_mbuf) { + if (prog && !ixgbe_xdp_mtu_ok(adapter, prog, READ_ONCE(dev->mtu))) { NL_SET_ERR_MSG_MOD(extack, "HW limitations or configured MTU requires non-linear frames and XDP prog does not support frags"); return -EOPNOTSUPP;