LXR linux/net/core/page

   1/* SPDX-License-Identifier: GPL-2.0
   2 *
   3 * page_pool.c
   4 *      Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
   5 *      Copyright (C) 2016 Red Hat, Inc.
   6 */
   7
   8#include <linux/types.h>
   9#include <linux/kernel.h>
  10#include <linux/slab.h>
  11#include <linux/device.h>
  12
  13#include <net/page_pool.h>
  14#include <net/xdp.h>
  15
  16#include <linux/dma-direction.h>
  17#include <linux/dma-mapping.h>
  18#include <linux/page-flags.h>
  19#include <linux/mm.h> /* for __put_page() */
  20#include <linux/poison.h>
  21
  22#include <trace/events/page_pool.h>
  23
  24#define DEFER_TIME (msecs_to_jiffies(1000))
  25#define DEFER_WARN_INTERVAL (60 * HZ)
  26
  27#define BIAS_MAX        LONG_MAX
  28
  29static int page_pool_init(struct page_pool *pool,
  30                          const struct page_pool_params *params)
  31{
  32        unsigned int ring_qsize = 1024; /* Default */
  33
  34        memcpy(&pool->p, params, sizeof(pool->p));
  35
  36        /* Validate only known flags were used */
  37        if (pool->p.flags & ~(PP_FLAG_ALL))
  38                return -EINVAL;
  39
  40        if (pool->p.pool_size)
  41                ring_qsize = pool->p.pool_size;
  42
  43        /* Sanity limit mem that can be pinned down */
  44        if (ring_qsize > 32768)
  45                return -E2BIG;
  46
  47        /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
  48         * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
  49         * which is the XDP_TX use-case.
  50         */
  51        if (pool->p.flags & PP_FLAG_DMA_MAP) {
  52                if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
  53                    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
  54                        return -EINVAL;
  55        }
  56
  57        if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
  58                /* In order to request DMA-sync-for-device the page
  59                 * needs to be mapped
  60                 */
  61                if (!(pool->p.flags & PP_FLAG_DMA_MAP))
  62                        return -EINVAL;
  63
  64                if (!pool->p.max_len)
  65                        return -EINVAL;
  66
  67                /* pool->p.offset has to be set according to the address
  68                 * offset used by the DMA engine to start copying rx data
  69                 */
  70        }
  71
  72        if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
  73            pool->p.flags & PP_FLAG_PAGE_FRAG)
  74                return -EINVAL;
  75
  76        if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
  77                return -ENOMEM;
  78
  79        atomic_set(&pool->pages_state_release_cnt, 0);
  80
  81        /* Driver calling page_pool_create() also call page_pool_destroy() */
  82        refcount_set(&pool->user_cnt, 1);
  83
  84        if (pool->p.flags & PP_FLAG_DMA_MAP)
  85                get_device(pool->p.dev);
  86
  87        return 0;
  88}
  89
  90struct page_pool *page_pool_create(const struct page_pool_params *params)
  91{
  92        struct page_pool *pool;
  93        int err;
  94
  95        pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
  96        if (!pool)
  97                return ERR_PTR(-ENOMEM);
  98
  99        err = page_pool_init(pool, params);
 100        if (err < 0) {
 101                pr_warn("%s() gave up with errno %d\n", __func__, err);
 102                kfree(pool);
 103                return ERR_PTR(err);
 104        }
 105
 106        return pool;
 107}
 108EXPORT_SYMBOL(page_pool_create);
 109
 110static void page_pool_return_page(struct page_pool *pool, struct page *page);
 111
 112noinline
 113static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
 114{
 115        struct ptr_ring *r = &pool->ring;
 116        struct page *page;
 117        int pref_nid; /* preferred NUMA node */
 118
 119        /* Quicker fallback, avoid locks when ring is empty */
 120        if (__ptr_ring_empty(r))
 121                return NULL;
 122
 123        /* Softirq guarantee CPU and thus NUMA node is stable. This,
 124         * assumes CPU refilling driver RX-ring will also run RX-NAPI.
 125         */
 126#ifdef CONFIG_NUMA
 127        pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
 128#else
 129        /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
 130        pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
 131#endif
 132
 133        /* Slower-path: Get pages from locked ring queue */
 134        spin_lock(&r->consumer_lock);
 135
 136        /* Refill alloc array, but only if NUMA match */
 137        do {
 138                page = __ptr_ring_consume(r);
 139                if (unlikely(!page))
 140                        break;
 141
 142                if (likely(page_to_nid(page) == pref_nid)) {
 143                        pool->alloc.cache[pool->alloc.count++] = page;
 144                } else {
 145                        /* NUMA mismatch;
 146                         * (1) release 1 page to page-allocator and
 147                         * (2) break out to fallthrough to alloc_pages_node.
 148                         * This limit stress on page buddy alloactor.
 149                         */
 150                        page_pool_return_page(pool, page);
 151                        page = NULL;
 152                        break;
 153                }
 154        } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
 155
 156        /* Return last page */
 157        if (likely(pool->alloc.count > 0))
 158                page = pool->alloc.cache[--pool->alloc.count];
 159
 160        spin_unlock(&r->consumer_lock);
 161        return page;
 162}
 163
 164/* fast path */
 165static struct page *__page_pool_get_cached(struct page_pool *pool)
 166{
 167        struct page *page;
 168
 169        /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
 170        if (likely(pool->alloc.count)) {
 171                /* Fast-path */
 172                page = pool->alloc.cache[--pool->alloc.count];
 173        } else {
 174                page = page_pool_refill_alloc_cache(pool);
 175        }
 176
 177        return page;
 178}
 179
 180static void page_pool_dma_sync_for_device(struct page_pool *pool,
 181                                          struct page *page,
 182                                          unsigned int dma_sync_size)
 183{
 184        dma_addr_t dma_addr = page_pool_get_dma_addr(page);
 185
 186        dma_sync_size = min(dma_sync_size, pool->p.max_len);
 187        dma_sync_single_range_for_device(pool->p.dev, dma_addr,
 188                                         pool->p.offset, dma_sync_size,
 189                                         pool->p.dma_dir);
 190}
 191
 192static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 193{
 194        dma_addr_t dma;
 195
 196        /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
 197         * since dma_addr_t can be either 32 or 64 bits and does not always fit
 198         * into page private data (i.e 32bit cpu with 64bit DMA caps)
 199         * This mapping is kept for lifetime of page, until leaving pool.
 200         */
 201        dma = dma_map_page_attrs(pool->p.dev, page, 0,
 202                                 (PAGE_SIZE << pool->p.order),
 203                                 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
 204        if (dma_mapping_error(pool->p.dev, dma))
 205                return false;
 206
 207        page_pool_set_dma_addr(page, dma);
 208
 209        if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 210                page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
 211
 212        return true;
 213}
 214
 215static void page_pool_set_pp_info(struct page_pool *pool,
 216                                  struct page *page)
 217{
 218        page->pp = pool;
 219        page->pp_magic |= PP_SIGNATURE;
 220}
 221
 222static void page_pool_clear_pp_info(struct page *page)
 223{
 224        page->pp_magic = 0;
 225        page->pp = NULL;
 226}
 227
 228static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
 229                                                 gfp_t gfp)
 230{
 231        struct page *page;
 232
 233        gfp |= __GFP_COMP;
 234        page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
 235        if (unlikely(!page))
 236                return NULL;
 237
 238        if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
 239            unlikely(!page_pool_dma_map(pool, page))) {
 240                put_page(page);
 241                return NULL;
 242        }
 243
 244        page_pool_set_pp_info(pool, page);
 245
 246        /* Track how many pages are held 'in-flight' */
 247        pool->pages_state_hold_cnt++;
 248        trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
 249        return page;
 250}
 251
 252/* slow path */
 253noinline
 254static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 255                                                 gfp_t gfp)
 256{
 257        const int bulk = PP_ALLOC_CACHE_REFILL;
 258        unsigned int pp_flags = pool->p.flags;
 259        unsigned int pp_order = pool->p.order;
 260        struct page *page;
 261        int i, nr_pages;
 262
 263        /* Don't support bulk alloc for high-order pages */
 264        if (unlikely(pp_order))
 265                return __page_pool_alloc_page_order(pool, gfp);
 266
 267        /* Unnecessary as alloc cache is empty, but guarantees zero count */
 268        if (unlikely(pool->alloc.count > 0))
 269                return pool->alloc.cache[--pool->alloc.count];
 270
 271        /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
 272        memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
 273
 274        nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache);
 275        if (unlikely(!nr_pages))
 276                return NULL;
 277
 278        /* Pages have been filled into alloc.cache array, but count is zero and
 279         * page element have not been (possibly) DMA mapped.
 280         */
 281        for (i = 0; i < nr_pages; i++) {
 282                page = pool->alloc.cache[i];
 283                if ((pp_flags & PP_FLAG_DMA_MAP) &&
 284                    unlikely(!page_pool_dma_map(pool, page))) {
 285                        put_page(page);
 286                        continue;
 287                }
 288
 289                page_pool_set_pp_info(pool, page);
 290                pool->alloc.cache[pool->alloc.count++] = page;
 291                /* Track how many pages are held 'in-flight' */
 292                pool->pages_state_hold_cnt++;
 293                trace_page_pool_state_hold(pool, page,
 294                                           pool->pages_state_hold_cnt);
 295        }
 296
 297        /* Return last page */
 298        if (likely(pool->alloc.count > 0))
 299                page = pool->alloc.cache[--pool->alloc.count];
 300        else
 301                page = NULL;
 302
 303        /* When page just alloc'ed is should/must have refcnt 1. */
 304        return page;
 305}
 306
 307/* For using page_pool replace: alloc_pages() API calls, but provide
 308 * synchronization guarantee for allocation side.
 309 */
 310struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
 311{
 312        struct page *page;
 313
 314        /* Fast-path: Get a page from cache */
 315        page = __page_pool_get_cached(pool);
 316        if (page)
 317                return page;
 318
 319        /* Slow-path: cache empty, do real allocation */
 320        page = __page_pool_alloc_pages_slow(pool, gfp);
 321        return page;
 322}
 323EXPORT_SYMBOL(page_pool_alloc_pages);
 324
 325/* Calculate distance between two u32 values, valid if distance is below 2^(31)
 326 *  https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
 327 */
 328#define _distance(a, b) (s32)((a) - (b))
 329
 330static s32 page_pool_inflight(struct page_pool *pool)
 331{
 332        u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
 333        u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
 334        s32 inflight;
 335
 336        inflight = _distance(hold_cnt, release_cnt);
 337
 338        trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
 339        WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
 340
 341        return inflight;
 342}
 343
 344/* Disconnects a page (from a page_pool).  API users can have a need
 345 * to disconnect a page (from a page_pool), to allow it to be used as
 346 * a regular page (that will eventually be returned to the normal
 347 * page-allocator via put_page).
 348 */
 349void page_pool_release_page(struct page_pool *pool, struct page *page)
 350{
 351        dma_addr_t dma;
 352        int count;
 353
 354        if (!(pool->p.flags & PP_FLAG_DMA_MAP))
 355                /* Always account for inflight pages, even if we didn't
 356                 * map them
 357                 */
 358                goto skip_dma_unmap;
 359
 360        dma = page_pool_get_dma_addr(page);
 361
 362        /* When page is unmapped, it cannot be returned to our pool */
 363        dma_unmap_page_attrs(pool->p.dev, dma,
 364                             PAGE_SIZE << pool->p.order, pool->p.dma_dir,
 365                             DMA_ATTR_SKIP_CPU_SYNC);
 366        page_pool_set_dma_addr(page, 0);
 367skip_dma_unmap:
 368        page_pool_clear_pp_info(page);
 369
 370        /* This may be the last page returned, releasing the pool, so
 371         * it is not safe to reference pool afterwards.
 372         */
 373        count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
 374        trace_page_pool_state_release(pool, page, count);
 375}
 376EXPORT_SYMBOL(page_pool_release_page);
 377
 378/* Return a page to the page allocator, cleaning up our state */
 379static void page_pool_return_page(struct page_pool *pool, struct page *page)
 380{
 381        page_pool_release_page(pool, page);
 382
 383        put_page(page);
 384        /* An optimization would be to call __free_pages(page, pool->p.order)
 385         * knowing page is not part of page-cache (thus avoiding a
 386         * __page_cache_release() call).
 387         */
 388}
 389
 390static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
 391{
 392        int ret;
 393        /* BH protection not needed if current is serving softirq */
 394        if (in_serving_softirq())
 395                ret = ptr_ring_produce(&pool->ring, page);
 396        else
 397                ret = ptr_ring_produce_bh(&pool->ring, page);
 398
 399        return (ret == 0) ? true : false;
 400}
 401
 402/* Only allow direct recycling in special circumstances, into the
 403 * alloc side cache.  E.g. during RX-NAPI processing for XDP_DROP use-case.
 404 *
 405 * Caller must provide appropriate safe context.
 406 */
 407static bool page_pool_recycle_in_cache(struct page *page,
 408                                       struct page_pool *pool)
 409{
 410        if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
 411                return false;
 412
 413        /* Caller MUST have verified/know (page_ref_count(page) == 1) */
 414        pool->alloc.cache[pool->alloc.count++] = page;
 415        return true;
 416}
 417
 418/* If the page refcnt == 1, this will try to recycle the page.
 419 * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
 420 * the configured size min(dma_sync_size, pool->max_len).
 421 * If the page refcnt != 1, then the page will be returned to memory
 422 * subsystem.
 423 */
 424static __always_inline struct page *
 425__page_pool_put_page(struct page_pool *pool, struct page *page,
 426                     unsigned int dma_sync_size, bool allow_direct)
 427{
 428        /* It is not the last user for the page frag case */
 429        if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
 430            page_pool_atomic_sub_frag_count_return(page, 1))
 431                return NULL;
 432
 433        /* This allocator is optimized for the XDP mode that uses
 434         * one-frame-per-page, but have fallbacks that act like the
 435         * regular page allocator APIs.
 436         *
 437         * refcnt == 1 means page_pool owns page, and can recycle it.
 438         *
 439         * page is NOT reusable when allocated when system is under
 440         * some pressure. (page_is_pfmemalloc)
 441         */
 442        if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
 443                /* Read barrier done in page_ref_count / READ_ONCE */
 444
 445                if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 446                        page_pool_dma_sync_for_device(pool, page,
 447                                                      dma_sync_size);
 448
 449                if (allow_direct && in_serving_softirq() &&
 450                    page_pool_recycle_in_cache(page, pool))
 451                        return NULL;
 452
 453                /* Page found as candidate for recycling */
 454                return page;
 455        }
 456        /* Fallback/non-XDP mode: API user have elevated refcnt.
 457         *
 458         * Many drivers split up the page into fragments, and some
 459         * want to keep doing this to save memory and do refcnt based
 460         * recycling. Support this use case too, to ease drivers
 461         * switching between XDP/non-XDP.
 462         *
 463         * In-case page_pool maintains the DMA mapping, API user must
 464         * call page_pool_put_page once.  In this elevated refcnt
 465         * case, the DMA is unmapped/released, as driver is likely
 466         * doing refcnt based recycle tricks, meaning another process
 467         * will be invoking put_page.
 468         */
 469        /* Do not replace this with page_pool_return_page() */
 470        page_pool_release_page(pool, page);
 471        put_page(page);
 472
 473        return NULL;
 474}
 475
 476void page_pool_put_page(struct page_pool *pool, struct page *page,
 477                        unsigned int dma_sync_size, bool allow_direct)
 478{
 479        page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
 480        if (page && !page_pool_recycle_in_ring(pool, page)) {
 481                /* Cache full, fallback to free pages */
 482                page_pool_return_page(pool, page);
 483        }
 484}
 485EXPORT_SYMBOL(page_pool_put_page);
 486
 487/* Caller must not use data area after call, as this function overwrites it */
 488void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 489                             int count)
 490{
 491        int i, bulk_len = 0;
 492
 493        for (i = 0; i < count; i++) {
 494                struct page *page = virt_to_head_page(data[i]);
 495
 496                page = __page_pool_put_page(pool, page, -1, false);
 497                /* Approved for bulk recycling in ptr_ring cache */
 498                if (page)
 499                        data[bulk_len++] = page;
 500        }
 501
 502        if (unlikely(!bulk_len))
 503                return;
 504
 505        /* Bulk producer into ptr_ring page_pool cache */
 506        page_pool_ring_lock(pool);
 507        for (i = 0; i < bulk_len; i++) {
 508                if (__ptr_ring_produce(&pool->ring, data[i]))
 509                        break; /* ring full */
 510        }
 511        page_pool_ring_unlock(pool);
 512
 513        /* Hopefully all pages was return into ptr_ring */
 514        if (likely(i == bulk_len))
 515                return;
 516
 517        /* ptr_ring cache full, free remaining pages outside producer lock
 518         * since put_page() with refcnt == 1 can be an expensive operation
 519         */
 520        for (; i < bulk_len; i++)
 521                page_pool_return_page(pool, data[i]);
 522}
 523EXPORT_SYMBOL(page_pool_put_page_bulk);
 524
 525static struct page *page_pool_drain_frag(struct page_pool *pool,
 526                                         struct page *page)
 527{
 528        long drain_count = BIAS_MAX - pool->frag_users;
 529
 530        /* Some user is still using the page frag */
 531        if (likely(page_pool_atomic_sub_frag_count_return(page,
 532                                                          drain_count)))
 533                return NULL;
 534
 535        if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
 536                if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 537                        page_pool_dma_sync_for_device(pool, page, -1);
 538
 539                return page;
 540        }
 541
 542        page_pool_return_page(pool, page);
 543        return NULL;
 544}
 545
 546static void page_pool_free_frag(struct page_pool *pool)
 547{
 548        long drain_count = BIAS_MAX - pool->frag_users;
 549        struct page *page = pool->frag_page;
 550
 551        pool->frag_page = NULL;
 552
 553        if (!page ||
 554            page_pool_atomic_sub_frag_count_return(page, drain_count))
 555                return;
 556
 557        page_pool_return_page(pool, page);
 558}
 559
 560struct page *page_pool_alloc_frag(struct page_pool *pool,
 561                                  unsigned int *offset,
 562                                  unsigned int size, gfp_t gfp)
 563{
 564        unsigned int max_size = PAGE_SIZE << pool->p.order;
 565        struct page *page = pool->frag_page;
 566
 567        if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
 568                    size > max_size))
 569                return NULL;
 570
 571        size = ALIGN(size, dma_get_cache_alignment());
 572        *offset = pool->frag_offset;
 573
 574        if (page && *offset + size > max_size) {
 575                page = page_pool_drain_frag(pool, page);
 576                if (page)
 577                        goto frag_reset;
 578        }
 579
 580        if (!page) {
 581                page = page_pool_alloc_pages(pool, gfp);
 582                if (unlikely(!page)) {
 583                        pool->frag_page = NULL;
 584                        return NULL;
 585                }
 586
 587                pool->frag_page = page;
 588
 589frag_reset:
 590                pool->frag_users = 1;
 591                *offset = 0;
 592                pool->frag_offset = size;
 593                page_pool_set_frag_count(page, BIAS_MAX);
 594                return page;
 595        }
 596
 597        pool->frag_users++;
 598        pool->frag_offset = *offset + size;
 599        return page;
 600}
 601EXPORT_SYMBOL(page_pool_alloc_frag);
 602
 603static void page_pool_empty_ring(struct page_pool *pool)
 604{
 605        struct page *page;
 606
 607        /* Empty recycle ring */
 608        while ((page = ptr_ring_consume_bh(&pool->ring))) {
 609                /* Verify the refcnt invariant of cached pages */
 610                if (!(page_ref_count(page) == 1))
 611                        pr_crit("%s() page_pool refcnt %d violation\n",
 612                                __func__, page_ref_count(page));
 613
 614                page_pool_return_page(pool, page);
 615        }
 616}
 617
 618static void page_pool_free(struct page_pool *pool)
 619{
 620        if (pool->disconnect)
 621                pool->disconnect(pool);
 622
 623        ptr_ring_cleanup(&pool->ring, NULL);
 624
 625        if (pool->p.flags & PP_FLAG_DMA_MAP)
 626                put_device(pool->p.dev);
 627
 628        kfree(pool);
 629}
 630
 631static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
 632{
 633        struct page *page;
 634
 635        if (pool->destroy_cnt)
 636                return;
 637
 638        /* Empty alloc cache, assume caller made sure this is
 639         * no-longer in use, and page_pool_alloc_pages() cannot be
 640         * call concurrently.
 641         */
 642        while (pool->alloc.count) {
 643                page = pool->alloc.cache[--pool->alloc.count];
 644                page_pool_return_page(pool, page);
 645        }
 646}
 647
 648static void page_pool_scrub(struct page_pool *pool)
 649{
 650        page_pool_empty_alloc_cache_once(pool);
 651        pool->destroy_cnt++;
 652
 653        /* No more consumers should exist, but producers could still
 654         * be in-flight.
 655         */
 656        page_pool_empty_ring(pool);
 657}
 658
 659static int page_pool_release(struct page_pool *pool)
 660{
 661        int inflight;
 662
 663        page_pool_scrub(pool);
 664        inflight = page_pool_inflight(pool);
 665        if (!inflight)
 666                page_pool_free(pool);
 667
 668        return inflight;
 669}
 670
 671static void page_pool_release_retry(struct work_struct *wq)
 672{
 673        struct delayed_work *dwq = to_delayed_work(wq);
 674        struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
 675        int inflight;
 676
 677        inflight = page_pool_release(pool);
 678        if (!inflight)
 679                return;
 680
 681        /* Periodic warning */
 682        if (time_after_eq(jiffies, pool->defer_warn)) {
 683                int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
 684
 685                pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
 686                        __func__, inflight, sec);
 687                pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
 688        }
 689
 690        /* Still not ready to be disconnected, retry later */
 691        schedule_delayed_work(&pool->release_dw, DEFER_TIME);
 692}
 693
 694void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
 695{
 696        refcount_inc(&pool->user_cnt);
 697        pool->disconnect = disconnect;
 698}
 699
 700void page_pool_destroy(struct page_pool *pool)
 701{
 702        if (!pool)
 703                return;
 704
 705        if (!page_pool_put(pool))
 706                return;
 707
 708        page_pool_free_frag(pool);
 709
 710        if (!page_pool_release(pool))
 711                return;
 712
 713        pool->defer_start = jiffies;
 714        pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
 715
 716        INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
 717        schedule_delayed_work(&pool->release_dw, DEFER_TIME);
 718}
 719EXPORT_SYMBOL(page_pool_destroy);
 720
 721/* Caller must provide appropriate safe context, e.g. NAPI. */
 722void page_pool_update_nid(struct page_pool *pool, int new_nid)
 723{
 724        struct page *page;
 725
 726        trace_page_pool_update_nid(pool, new_nid);
 727        pool->p.nid = new_nid;
 728
 729        /* Flush pool alloc cache, as refill will check NUMA node */
 730        while (pool->alloc.count) {
 731                page = pool->alloc.cache[--pool->alloc.count];
 732                page_pool_return_page(pool, page);
 733        }
 734}
 735EXPORT_SYMBOL(page_pool_update_nid);
 736
 737bool page_pool_return_skb_page(struct page *page)
 738{
 739        struct page_pool *pp;
 740
 741        page = compound_head(page);
 742
 743        /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
 744         * in order to preserve any existing bits, such as bit 0 for the
 745         * head page of compound page and bit 1 for pfmemalloc page, so
 746         * mask those bits for freeing side when doing below checking,
 747         * and page_is_pfmemalloc() is checked in __page_pool_put_page()
 748         * to avoid recycling the pfmemalloc page.
 749         */
 750        if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
 751                return false;
 752
 753        pp = page->pp;
 754
 755        /* Driver set this to memory recycling info. Reset it on recycle.
 756         * This will *not* work for NIC using a split-page memory model.
 757         * The page will be returned to the pool here regardless of the
 758         * 'flipped' fragment being in use or not.
 759         */
 760        page_pool_put_full_page(pp, page, false);
 761
 762        return true;
 763}
 764EXPORT_SYMBOL(page_pool_return_skb_page);
 765