From 6042a2c828dc1246e5199822826404833e268ddf Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:47 -0800 Subject: [PATCH 01/12] page_pool: Add allocation stats Add per-pool statistics counters for the allocation path of a page pool. These stats are incremented in softirq context, so no locking or per-cpu variables are needed. This code is disabled by default and a kernel config option is provided for users who wish to enable them. The statistics added are: - fast: successful fast path allocations - slow: slow path order-0 allocations - slow_high_order: slow path high order allocations - empty: ptr ring is empty, so a slow path allocation was forced. - refill: an allocation which triggered a refill of the cache - waive: pages obtained from the ptr ring that cannot be added to the cache due to a NUMA mismatch. Signed-off-by: Joe Damato Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- include/net/page_pool.h | 18 ++++++++++++++++++ net/Kconfig | 13 +++++++++++++ net/core/page_pool.c | 24 ++++++++++++++++++++---- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 2b1da328370c..861b39b5215d 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -82,6 +82,19 @@ struct page_pool_params { unsigned int offset; /* DMA addr offset */ }; +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; +#endif + struct page_pool { struct page_pool_params p; @@ -95,6 +108,11 @@ struct page_pool { struct page *frag_page; long frag_users; +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif + /* * Data structure for allocation side * diff --git a/net/Kconfig b/net/Kconfig index a34f45852188..1d4211802ebd 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -440,6 +440,19 @@ config NET_DEVLINK config PAGE_POOL bool +config PAGE_POOL_STATS + default n + bool "Page pool stats" + depends on PAGE_POOL + help + Enable page pool statistics to track page allocation and recycling + in page pools. This option incurs additional CPU cost in allocation + and recycle paths and additional memory cost to store the statistics. + These statistics are only available if this option is enabled and if + the driver using the page pool supports exporting this data. + + If unsure, say N. + config FAILOVER tristate "Generic failover module" help diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 381705fed4db..8751e439f71d 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -26,6 +26,13 @@ #define BIAS_MAX LONG_MAX +#ifdef CONFIG_PAGE_POOL_STATS +/* alloc_stat_inc is intended to be used in softirq context */ +#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) +#else +#define alloc_stat_inc(pool, __stat) +#endif + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -115,8 +122,10 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) + if (__ptr_ring_empty(r)) { + alloc_stat_inc(pool, empty); return NULL; + } /* Softirq guarantee CPU and thus NUMA node is stable. This, * assumes CPU refilling driver RX-ring will also run RX-NAPI. @@ -146,14 +155,17 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) * This limit stress on page buddy alloactor. */ page_pool_return_page(pool, page); + alloc_stat_inc(pool, waive); page = NULL; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ - if (likely(pool->alloc.count > 0)) + if (likely(pool->alloc.count > 0)) { page = pool->alloc.cache[--pool->alloc.count]; + alloc_stat_inc(pool, refill); + } spin_unlock(&r->consumer_lock); return page; @@ -168,6 +180,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) if (likely(pool->alloc.count)) { /* Fast-path */ page = pool->alloc.cache[--pool->alloc.count]; + alloc_stat_inc(pool, fast); } else { page = page_pool_refill_alloc_cache(pool); } @@ -237,6 +250,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } + alloc_stat_inc(pool, slow_high_order); page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ @@ -291,10 +305,12 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, } /* Return last page */ - if (likely(pool->alloc.count > 0)) + if (likely(pool->alloc.count > 0)) { page = pool->alloc.cache[--pool->alloc.count]; - else + alloc_stat_inc(pool, slow); + } else { page = NULL; + } /* When page just alloc'ed is should/must have refcnt 1. */ return page; -- Gitee From 22523e51601cd450db095bfd70bdcb85c182d5ad Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:48 -0800 Subject: [PATCH 02/12] page_pool: Add recycle stats Add per-cpu stats tracking page pool recycling events: - cached: recycling placed page in the page pool cache - cache_full: page pool cache was full - ring: page placed into the ptr ring - ring_full: page released from page pool because the ptr ring was full - released_refcnt: page released (and not recycled) because refcnt > 1 Signed-off-by: Joe Damato Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- include/net/page_pool.h | 16 ++++++++++++++++ net/core/page_pool.c | 30 ++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 861b39b5215d..21f055ddd38c 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -93,6 +93,18 @@ struct page_pool_alloc_stats { u64 refill; /* allocations via successful refill */ u64 waive; /* failed refills due to numa zone mismatch */ }; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; #endif struct page_pool { @@ -141,6 +153,10 @@ struct page_pool { */ struct ptr_ring ring; +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif atomic_t pages_state_release_cnt; /* A page_pool is strictly tied to a single RX-queue being diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 8751e439f71d..28c3217695d0 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -29,8 +29,15 @@ #ifdef CONFIG_PAGE_POOL_STATS /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) +/* recycle_stat_inc is safe to use when preemption is possible. */ +#define recycle_stat_inc(pool, __stat) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ + this_cpu_inc(s->__stat); \ + } while (0) #else #define alloc_stat_inc(pool, __stat) +#define recycle_stat_inc(pool, __stat) #endif static int page_pool_init(struct page_pool *pool, @@ -78,6 +85,12 @@ static int page_pool_init(struct page_pool *pool, pool->p.flags & PP_FLAG_PAGE_FRAG) return -EINVAL; +#ifdef CONFIG_PAGE_POOL_STATS + pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->recycle_stats) + return -ENOMEM; +#endif + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; @@ -408,7 +421,12 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) else ret = ptr_ring_produce_bh(&pool->ring, page); - return (ret == 0) ? true : false; + if (!ret) { + recycle_stat_inc(pool, ring); + return true; + } + + return false; } /* Only allow direct recycling in special circumstances, into the @@ -419,11 +437,14 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) static bool page_pool_recycle_in_cache(struct page *page, struct page_pool *pool) { - if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) + if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { + recycle_stat_inc(pool, cache_full); return false; + } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ pool->alloc.cache[pool->alloc.count++] = page; + recycle_stat_inc(pool, cached); return true; } @@ -478,6 +499,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * doing refcnt based recycle tricks, meaning another process * will be invoking put_page. */ + recycle_stat_inc(pool, released_refcnt); /* Do not replace this with page_pool_return_page() */ page_pool_release_page(pool, page); put_page(page); @@ -491,6 +513,7 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ + recycle_stat_inc(pool, ring_full); page_pool_return_page(pool, page); } } @@ -637,6 +660,9 @@ static void page_pool_free(struct page_pool *pool) if (pool->p.flags & PP_FLAG_DMA_MAP) put_device(pool->p.dev); +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->recycle_stats); +#endif kfree(pool); } -- Gitee From bdcff160adfa64dee83b97c88d4bb225bf8a8737 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:49 -0800 Subject: [PATCH 03/12] page_pool: Add function to batch and return stats Adds a function page_pool_get_stats which can be used by drivers to obtain stats for a specified page_pool. Signed-off-by: Joe Damato Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- include/net/page_pool.h | 17 +++++++++++++++++ net/core/page_pool.c | 25 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 21f055ddd38c..ee375861c6a6 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -105,6 +105,23 @@ struct page_pool_recycle_stats { * refcnt */ }; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); #endif struct page_pool { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 28c3217695d0..3a7e23a7e6de 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -35,6 +35,31 @@ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ this_cpu_inc(s->__stat); \ } while (0) + +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats) +{ + int cpu = 0; + + if (!stats) + return false; + + memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats)); + + for_each_possible_cpu(cpu) { + const struct page_pool_recycle_stats *pcpu = + per_cpu_ptr(pool->recycle_stats, cpu); + + stats->recycle_stats.cached += pcpu->cached; + stats->recycle_stats.cache_full += pcpu->cache_full; + stats->recycle_stats.ring += pcpu->ring; + stats->recycle_stats.ring_full += pcpu->ring_full; + stats->recycle_stats.released_refcnt += pcpu->released_refcnt; + } + + return true; +} +EXPORT_SYMBOL(page_pool_get_stats); #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) -- Gitee From 20a59ab917f75f398c8a722aa520f8f61dc3e2ef Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Fri, 21 Feb 2020 11:15:19 +0200 Subject: [PATCH 04/12] net: page_pool: Add documentation on page_pool API Add documentation explaining the basic functionality and design principles of the API Signed-off-by: Ilias Apalodimas Acked-by: Randy Dunlap Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- Documentation/networking/page_pool.rst | 159 +++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 Documentation/networking/page_pool.rst diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst new file mode 100644 index 000000000000..43088ddf95e4 --- /dev/null +++ b/Documentation/networking/page_pool.rst @@ -0,0 +1,159 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Page Pool API +============= + +The page_pool allocator is optimized for the XDP mode that uses one frame +per-page, but it can fallback on the regular page allocator APIs. + +Basic use involves replacing alloc_pages() calls with the +page_pool_alloc_pages() call. Drivers should use page_pool_dev_alloc_pages() +replacing dev_alloc_pages(). + +API keeps track of inflight pages, in order to let API user know +when it is safe to free a page_pool object. Thus, API users +must run page_pool_release_page() when a page is leaving the page_pool or +call page_pool_put_page() where appropriate in order to maintain correct +accounting. + +API user must call page_pool_put_page() once on a page, as it +will either recycle the page, or in case of refcnt > 1, it will +release the DMA mapping and inflight state accounting. + +Architecture overview +===================== + +.. code-block:: none + + +------------------+ + | Driver | + +------------------+ + ^ + | + | + | + v + +--------------------------------------------+ + | request memory | + +--------------------------------------------+ + ^ ^ + | | + | Pool empty | Pool has entries + | | + v v + +-----------------------+ +------------------------+ + | alloc (and map) pages | | get page from cache | + +-----------------------+ +------------------------+ + ^ ^ + | | + | cache available | No entries, refill + | | from ptr-ring + | | + v v + +-----------------+ +------------------+ + | Fast cache | | ptr-ring cache | + +-----------------+ +------------------+ + +API interface +============= +The number of pools created **must** match the number of hardware queues +unless hardware restrictions make that impossible. This would otherwise beat the +purpose of page pool, which is allocate pages fast from cache without locking. +This lockless guarantee naturally comes from running under a NAPI softirq. +The protection doesn't strictly have to be NAPI, any guarantee that allocating +a page will cause no race conditions is enough. + +* page_pool_create(): Create a pool. + * flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV + * order: 2^order pages on allocation + * pool_size: size of the ptr_ring + * nid: preferred NUMA node for allocation + * dev: struct device. Used on DMA operations + * dma_dir: DMA direction + * max_len: max DMA sync memory size + * offset: DMA address offset + +* page_pool_put_page(): The outcome of this depends on the page refcnt. If the + driver bumps the refcnt > 1 this will unmap the page. If the page refcnt is 1 + the allocator owns the page and will try to recycle it in one of the pool + caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device + using dma_sync_single_range_for_device(). + +* page_pool_put_full_page(): Similar to page_pool_put_page(), but will DMA sync + for the entire memory area configured in area pool->max_len. + +* page_pool_recycle_direct(): Similar to page_pool_put_full_page() but caller + must guarantee safe context (e.g NAPI), since it will recycle the page + directly into the pool fast cache. + +* page_pool_release_page(): Unmap the page (if mapped) and account for it on + inflight counters. + +* page_pool_dev_alloc_pages(): Get a page from the page allocator or page_pool + caches. + +* page_pool_get_dma_addr(): Retrieve the stored DMA address. + +* page_pool_get_dma_dir(): Retrieve the stored DMA direction. + +Coding examples +=============== + +Registration +------------ + +.. code-block:: c + + /* Page pool registration */ + struct page_pool_params pp_params = { 0 }; + struct xdp_rxq_info xdp_rxq; + int err; + + pp_params.order = 0; + /* internal DMA mapping in page_pool */ + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = DESC_NUM; + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = priv->dev; + pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + page_pool = page_pool_create(&pp_params); + + err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0); + if (err) + goto err_out; + + err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool); + if (err) + goto err_out; + +NAPI poller +----------- + + +.. code-block:: c + + /* NAPI Rx poller */ + enum dma_data_direction dma_dir; + + dma_dir = page_pool_get_dma_dir(dring->page_pool); + while (done < budget) { + if (some error) + page_pool_recycle_direct(page_pool, page); + if (packet_is_xdp) { + if XDP_DROP: + page_pool_recycle_direct(page_pool, page); + } else (packet_is_skb) { + page_pool_release_page(page_pool, page); + new_page = page_pool_dev_alloc_pages(page_pool); + } + } + +Driver unload +------------- + +.. code-block:: c + + /* Driver unload */ + page_pool_put_full_page(page_pool, page, false); + xdp_rxq_info_unreg(&xdp_rxq); -- Gitee From 58e7a554b70b1fe17e7230665a527fc8d9f8fbcd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 23 Nov 2020 16:45:46 +0100 Subject: [PATCH 05/12] net: page_pool: Add page_pool_put_page_bulk() to page_pool.rst Introduce page_pool_put_page_bulk() entry into the API section of page_pool.rst Acked-by: Ilias Apalodimas Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/a6a5141b4d7b7b71fa7778b16b48f80095dd3233.1606146163.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: hongrongxuan --- Documentation/networking/page_pool.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index 43088ddf95e4..a147591ce203 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -97,6 +97,14 @@ a page will cause no race conditions is enough. * page_pool_get_dma_dir(): Retrieve the stored DMA direction. +* page_pool_put_page_bulk(): Tries to refill a number of pages into the + ptr_ring cache holding ptr_ring producer lock. If the ptr_ring is full, + page_pool_put_page_bulk() will release leftover pages to the page allocator. + page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx + completion loop for the XDP_REDIRECT use case. + Please note the caller must not use data area after running + page_pool_put_page_bulk(), as this function overwrites it. + Coding examples =============== -- Gitee From 96e9e2d043157723606b8ae6e44ed4580652c7d8 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 1 Mar 2022 23:55:50 -0800 Subject: [PATCH 06/12] Documentation: update networking/page_pool.rst Add the new stats API, kernel config parameter, and stats structure information to the page_pool documentation. Signed-off-by: Joe Damato Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- Documentation/networking/page_pool.rst | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index a147591ce203..5db8c263b0c6 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -105,6 +105,47 @@ a page will cause no race conditions is enough. Please note the caller must not use data area after running page_pool_put_page_bulk(), as this function overwrites it. +* page_pool_get_stats(): Retrieve statistics about the page_pool. This API + is only available if the kernel has been configured with + ``CONFIG_PAGE_POOL_STATS=y``. A pointer to a caller allocated ``struct + page_pool_stats`` structure is passed to this API which is filled in. The + caller can then report those stats to the user (perhaps via ethtool, + debugfs, etc.). See below for an example usage of this API. + +Stats API and structures +------------------------ +If the kernel is configured with ``CONFIG_PAGE_POOL_STATS=y``, the API +``page_pool_get_stats()`` and structures described below are available. It +takes a pointer to a ``struct page_pool`` and a pointer to a ``struct +page_pool_stats`` allocated by the caller. + +The API will fill in the provided ``struct page_pool_stats`` with +statistics about the page_pool. + +The stats structure has the following fields:: + + struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; + }; + + +The ``struct page_pool_alloc_stats`` has the following fields: + * ``fast``: successful fast path allocations + * ``slow``: slow path order-0 allocations + * ``slow_high_order``: slow path high order allocations + * ``empty``: ptr ring is empty, so a slow path allocation was forced. + * ``refill``: an allocation which triggered a refill of the cache + * ``waive``: pages obtained from the ptr ring that cannot be added to + the cache due to a NUMA mismatch. + +The ``struct page_pool_recycle_stats`` has the following fields: + * ``cached``: recycling placed page in the page pool cache + * ``cache_full``: page pool cache was full + * ``ring``: page placed into the ptr ring + * ``ring_full``: page released from page pool because the ptr ring was full + * ``released_refcnt``: page released (and not recycled) because refcnt > 1 + Coding examples =============== @@ -157,6 +198,21 @@ NAPI poller } } +Stats +----- + +.. code-block:: c + + #ifdef CONFIG_PAGE_POOL_STATS + /* retrieve stats */ + struct page_pool_stats stats = { 0 }; + if (page_pool_get_stats(page_pool, &stats)) { + /* perhaps the driver reports statistics with ethool */ + ethtool_print_allocation_stats(&stats.alloc_stats); + ethtool_print_recycle_stats(&stats.recycle_stats); + } + #endif + Driver unload ------------- -- Gitee From 5f852c1212e83d2f919d6f8cd9396535d95c0a01 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 11 Apr 2022 16:05:26 +0200 Subject: [PATCH 07/12] page_pool: Add recycle stats to page_pool_put_page_bulk Add missing recycle stats to page_pool_put_page_bulk routine. Reviewed-by: Joe Damato Signed-off-by: Lorenzo Bianconi Reviewed-by: Ilias Apalodimas Link: https://lore.kernel.org/r/3712178b51c007cfaed910ea80e68f00c916b1fa.1649685634.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: hongrongxuan --- net/core/page_pool.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 3a7e23a7e6de..f9132b60c5ae 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -36,6 +36,12 @@ this_cpu_inc(s->__stat); \ } while (0) +#define recycle_stat_add(pool, __stat, val) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ + this_cpu_add(s->__stat, val); \ + } while (0) + bool page_pool_get_stats(struct page_pool *pool, struct page_pool_stats *stats) { @@ -63,6 +69,7 @@ EXPORT_SYMBOL(page_pool_get_stats); #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) +#define recycle_stat_add(pool, __stat, val) #endif static int page_pool_init(struct page_pool *pool, @@ -565,9 +572,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ page_pool_ring_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) - break; /* ring full */ + if (__ptr_ring_produce(&pool->ring, data[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } } + recycle_stat_add(pool, ring, i); page_pool_ring_unlock(pool); /* Hopefully all pages was return into ptr_ring */ -- Gitee From f27d00fcb716ab42dd475eb5ace506a9e49e5825 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 12 Apr 2022 18:31:58 +0200 Subject: [PATCH 08/12] net: page_pool: introduce ethtool stats Introduce page_pool APIs to report stats through ethtool and reduce duplicated code in each driver. Signed-off-by: Lorenzo Bianconi Reviewed-by: Jakub Kicinski Reviewed-by: Ilias Apalodimas Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- include/net/page_pool.h | 21 ++++++++++++++ net/core/page_pool.c | 63 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index ee375861c6a6..c93d6271207d 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -115,6 +115,10 @@ struct page_pool_stats { struct page_pool_recycle_stats recycle_stats; }; +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + /* * Drivers that wish to harvest page pool stats and report them to users * (perhaps via ethtool, debugfs, or another mechanism) can allocate a @@ -122,6 +126,23 @@ struct page_pool_stats { */ bool page_pool_get_stats(struct page_pool *pool, struct page_pool_stats *stats); +#else + +static inline int page_pool_ethtool_stats_get_count(void) +{ + return 0; +} + +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + return data; +} + +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + return data; +} + #endif struct page_pool { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f9132b60c5ae..a17eb7ea1ea6 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -18,6 +18,7 @@ #include #include /* for __put_page() */ #include +#include #include @@ -42,6 +43,20 @@ this_cpu_add(s->__stat, val); \ } while (0) +static const char pp_stats[][ETH_GSTRING_LEN] = { + "rx_pp_alloc_fast", + "rx_pp_alloc_slow", + "rx_pp_alloc_slow_ho", + "rx_pp_alloc_empty", + "rx_pp_alloc_refill", + "rx_pp_alloc_waive", + "rx_pp_recycle_cached", + "rx_pp_recycle_cache_full", + "rx_pp_recycle_ring", + "rx_pp_recycle_ring_full", + "rx_pp_recycle_released_ref", +}; + bool page_pool_get_stats(struct page_pool *pool, struct page_pool_stats *stats) { @@ -50,7 +65,13 @@ bool page_pool_get_stats(struct page_pool *pool, if (!stats) return false; - memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats)); + /* The caller is responsible to initialize stats. */ + stats->alloc_stats.fast += pool->alloc_stats.fast; + stats->alloc_stats.slow += pool->alloc_stats.slow; + stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order; + stats->alloc_stats.empty += pool->alloc_stats.empty; + stats->alloc_stats.refill += pool->alloc_stats.refill; + stats->alloc_stats.waive += pool->alloc_stats.waive; for_each_possible_cpu(cpu) { const struct page_pool_recycle_stats *pcpu = @@ -66,6 +87,46 @@ bool page_pool_get_stats(struct page_pool *pool, return true; } EXPORT_SYMBOL(page_pool_get_stats); + +u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { + memcpy(data, pp_stats[i], ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + return data; +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); + +int page_pool_ethtool_stats_get_count(void) +{ + return ARRAY_SIZE(pp_stats); +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); + +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + struct page_pool_stats *pool_stats = stats; + + *data++ = pool_stats->alloc_stats.fast; + *data++ = pool_stats->alloc_stats.slow; + *data++ = pool_stats->alloc_stats.slow_high_order; + *data++ = pool_stats->alloc_stats.empty; + *data++ = pool_stats->alloc_stats.refill; + *data++ = pool_stats->alloc_stats.waive; + *data++ = pool_stats->recycle_stats.cached; + *data++ = pool_stats->recycle_stats.cache_full; + *data++ = pool_stats->recycle_stats.ring; + *data++ = pool_stats->recycle_stats.ring_full; + *data++ = pool_stats->recycle_stats.released_refcnt; + + return data; +} +EXPORT_SYMBOL(page_pool_ethtool_stats_get); + #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) -- Gitee From 5b5713eee4a8a42f962dd6221a33abc271853cef Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Thu, 12 May 2022 14:56:31 +0800 Subject: [PATCH 09/12] net: page_pool: add page allocation stats for two fast page allocate path Currently If use page pool allocation stats to analysis a RX performance degradation problem. These stats only count for pages allocate from page_pool_alloc_pages. But nic drivers such as hns3 use page_pool_dev_alloc_frag to allocate pages, so page stats in this API should also be counted. Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller Signed-off-by: hongrongxuan --- net/core/page_pool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a17eb7ea1ea6..00d6406c3546 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -705,8 +705,10 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, if (page && *offset + size > max_size) { page = page_pool_drain_frag(pool, page); - if (page) + if (page) { + alloc_stat_inc(pool, fast); goto frag_reset; + } } if (!page) { @@ -728,6 +730,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, pool->frag_users++; pool->frag_offset = *offset + size; + alloc_stat_inc(pool, fast); return page; } EXPORT_SYMBOL(page_pool_alloc_frag); -- Gitee From 1b68b71cb5402dc854f444aabaa4e66b20bc5134 Mon Sep 17 00:00:00 2001 From: hongrongxuan Date: Thu, 23 May 2024 04:18:11 -0400 Subject: [PATCH 10/12] tencent.config: arm64: default select CONFIG_PAGE_POOL_STATS --- arch/arm64/configs/tencent.config | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 5d120d0c8b21..4c00e34e77c3 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1455,3 +1455,4 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_KSTRTOX=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_PAGE_POOL_STATS=y -- Gitee From 53be21a94be266175bfb01710085fa11fd8fc619 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 28 Jun 2021 19:40:11 -0700 Subject: [PATCH 11/12] mm/page_alloc: add an alloc_pages_bulk_array_node() helper Patch series "vmalloc() vs bulk allocator", v2. This patch (of 3): Add a "node" variant of the alloc_pages_bulk_array() function. The helper guarantees that a __alloc_pages_bulk() is invoked with a valid NUMA node ID. Link: https://lkml.kernel.org/r/20210516202056.2120-1-urezki@gmail.com Link: https://lkml.kernel.org/r/20210516202056.2120-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Acked-by: Mel Gorman Cc: Mel Gorman Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Hillf Danton Cc: Michal Hocko Cc: Oleksiy Avramchenko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: hongrongxuan --- include/linux/gfp.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 467545cae440..4564ebb29185 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -514,6 +514,15 @@ alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_arr return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array); } +static inline unsigned long +alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) +{ + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). -- Gitee From 2ab60ed82d295b7ad2c2bddc466c575a8ae91a45 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 5 Jul 2022 19:35:15 +0800 Subject: [PATCH 12/12] net: page_pool: optimize page pool page allocation in NUMA scenario Currently NIC packet receiving performance based on page pool deteriorates occasionally. To analysis the causes of this problem page allocation stats are collected. Here are the stats when NIC rx performance deteriorates: bandwidth(Gbits/s) 16.8 6.91 rx_pp_alloc_fast 13794308 21141869 rx_pp_alloc_slow 108625 166481 rx_pp_alloc_slow_h 0 0 rx_pp_alloc_empty 8192 8192 rx_pp_alloc_refill 0 0 rx_pp_alloc_waive 100433 158289 rx_pp_recycle_cached 0 0 rx_pp_recycle_cache_full 0 0 rx_pp_recycle_ring 362400 420281 rx_pp_recycle_ring_full 6064893 9709724 rx_pp_recycle_released_ref 0 0 The rx_pp_alloc_waive count indicates that a large number of pages' numa node are inconsistent with the NIC device numa node. Therefore these pages can't be reused by the page pool. As a result, many new pages would be allocated by __page_pool_alloc_pages_slow which is time consuming. This causes the NIC rx performance fluctuations. The main reason of huge numa mismatch pages in page pool is that page pool uses alloc_pages_bulk_array to allocate original pages. This function is not suitable for page allocation in NUMA scenario. So this patch uses alloc_pages_bulk_array_node which has a NUMA id input parameter to ensure the NUMA consistent between NIC device and allocated pages. Repeated NIC rx performance tests are performed 40 times. NIC rx bandwidth is higher and more stable compared to the datas above. Here are three test stats, the rx_pp_alloc_waive count is zero and rx_pp_alloc_slow which indicates pages allocated from slow patch is relatively low. bandwidth(Gbits/s) 93 93.9 93.8 rx_pp_alloc_fast 60066264 61266386 60938254 rx_pp_alloc_slow 16512 16517 16539 rx_pp_alloc_slow_ho 0 0 0 rx_pp_alloc_empty 16512 16517 16539 rx_pp_alloc_refill 473841 481910 481585 rx_pp_alloc_waive 0 0 0 rx_pp_recycle_cached 0 0 0 rx_pp_recycle_cache_full 0 0 0 rx_pp_recycle_ring 29754145 30358243 30194023 rx_pp_recycle_ring_full 0 0 0 rx_pp_recycle_released_ref 0 0 0 Signed-off-by: Jie Wang Acked-by: Jesper Dangaard Brouer Acked-by: Ilias Apalodimas Link: https://lore.kernel.org/r/20220705113515.54342-1-huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: hongrongxuan --- net/core/page_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 00d6406c3546..1983035f5ca1 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -387,7 +387,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache); + nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, + pool->alloc.cache); if (unlikely(!nr_pages)) return NULL; -- Gitee