1
2
3
4
5
6
7
8#include <linux/types.h>
9#include <linux/kernel.h>
10#include <linux/slab.h>
11#include <linux/device.h>
12
13#include <net/page_pool.h>
14#include <net/xdp.h>
15
16#include <linux/dma-direction.h>
17#include <linux/dma-mapping.h>
18#include <linux/page-flags.h>
19#include <linux/mm.h>
20#include <linux/poison.h>
21
22#include <trace/events/page_pool.h>
23
24#define DEFER_TIME (msecs_to_jiffies(1000))
25#define DEFER_WARN_INTERVAL (60 * HZ)
26
27#define BIAS_MAX LONG_MAX
28
29static int page_pool_init(struct page_pool *pool,
30 const struct page_pool_params *params)
31{
32 unsigned int ring_qsize = 1024;
33
34 memcpy(&pool->p, params, sizeof(pool->p));
35
36
37 if (pool->p.flags & ~(PP_FLAG_ALL))
38 return -EINVAL;
39
40 if (pool->p.pool_size)
41 ring_qsize = pool->p.pool_size;
42
43
44 if (ring_qsize > 32768)
45 return -E2BIG;
46
47
48
49
50
51 if (pool->p.flags & PP_FLAG_DMA_MAP) {
52 if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
53 (pool->p.dma_dir != DMA_BIDIRECTIONAL))
54 return -EINVAL;
55 }
56
57 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
58
59
60
61 if (!(pool->p.flags & PP_FLAG_DMA_MAP))
62 return -EINVAL;
63
64 if (!pool->p.max_len)
65 return -EINVAL;
66
67
68
69
70 }
71
72 if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
73 pool->p.flags & PP_FLAG_PAGE_FRAG)
74 return -EINVAL;
75
76 if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
77 return -ENOMEM;
78
79 atomic_set(&pool->pages_state_release_cnt, 0);
80
81
82 refcount_set(&pool->user_cnt, 1);
83
84 if (pool->p.flags & PP_FLAG_DMA_MAP)
85 get_device(pool->p.dev);
86
87 return 0;
88}
89
90struct page_pool *page_pool_create(const struct page_pool_params *params)
91{
92 struct page_pool *pool;
93 int err;
94
95 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
96 if (!pool)
97 return ERR_PTR(-ENOMEM);
98
99 err = page_pool_init(pool, params);
100 if (err < 0) {
101 pr_warn("%s() gave up with errno %d\n", __func__, err);
102 kfree(pool);
103 return ERR_PTR(err);
104 }
105
106 return pool;
107}
108EXPORT_SYMBOL(page_pool_create);
109
110static void page_pool_return_page(struct page_pool *pool, struct page *page);
111
112noinline
113static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
114{
115 struct ptr_ring *r = &pool->ring;
116 struct page *page;
117 int pref_nid;
118
119
120 if (__ptr_ring_empty(r))
121 return NULL;
122
123
124
125
126#ifdef CONFIG_NUMA
127 pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
128#else
129
130 pref_nid = numa_mem_id();
131#endif
132
133
134 spin_lock(&r->consumer_lock);
135
136
137 do {
138 page = __ptr_ring_consume(r);
139 if (unlikely(!page))
140 break;
141
142 if (likely(page_to_nid(page) == pref_nid)) {
143 pool->alloc.cache[pool->alloc.count++] = page;
144 } else {
145
146
147
148
149
150 page_pool_return_page(pool, page);
151 page = NULL;
152 break;
153 }
154 } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
155
156
157 if (likely(pool->alloc.count > 0))
158 page = pool->alloc.cache[--pool->alloc.count];
159
160 spin_unlock(&r->consumer_lock);
161 return page;
162}
163
164
165static struct page *__page_pool_get_cached(struct page_pool *pool)
166{
167 struct page *page;
168
169
170 if (likely(pool->alloc.count)) {
171
172 page = pool->alloc.cache[--pool->alloc.count];
173 } else {
174 page = page_pool_refill_alloc_cache(pool);
175 }
176
177 return page;
178}
179
180static void page_pool_dma_sync_for_device(struct page_pool *pool,
181 struct page *page,
182 unsigned int dma_sync_size)
183{
184 dma_addr_t dma_addr = page_pool_get_dma_addr(page);
185
186 dma_sync_size = min(dma_sync_size, pool->p.max_len);
187 dma_sync_single_range_for_device(pool->p.dev, dma_addr,
188 pool->p.offset, dma_sync_size,
189 pool->p.dma_dir);
190}
191
192static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
193{
194 dma_addr_t dma;
195
196
197
198
199
200
201 dma = dma_map_page_attrs(pool->p.dev, page, 0,
202 (PAGE_SIZE << pool->p.order),
203 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
204 if (dma_mapping_error(pool->p.dev, dma))
205 return false;
206
207 page_pool_set_dma_addr(page, dma);
208
209 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
210 page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
211
212 return true;
213}
214
215static void page_pool_set_pp_info(struct page_pool *pool,
216 struct page *page)
217{
218 page->pp = pool;
219 page->pp_magic |= PP_SIGNATURE;
220}
221
222static void page_pool_clear_pp_info(struct page *page)
223{
224 page->pp_magic = 0;
225 page->pp = NULL;
226}
227
228static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
229 gfp_t gfp)
230{
231 struct page *page;
232
233 gfp |= __GFP_COMP;
234 page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
235 if (unlikely(!page))
236 return NULL;
237
238 if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
239 unlikely(!page_pool_dma_map(pool, page))) {
240 put_page(page);
241 return NULL;
242 }
243
244 page_pool_set_pp_info(pool, page);
245
246
247 pool->pages_state_hold_cnt++;
248 trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
249 return page;
250}
251
252
253noinline
254static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
255 gfp_t gfp)
256{
257 const int bulk = PP_ALLOC_CACHE_REFILL;
258 unsigned int pp_flags = pool->p.flags;
259 unsigned int pp_order = pool->p.order;
260 struct page *page;
261 int i, nr_pages;
262
263
264 if (unlikely(pp_order))
265 return __page_pool_alloc_page_order(pool, gfp);
266
267
268 if (unlikely(pool->alloc.count > 0))
269 return pool->alloc.cache[--pool->alloc.count];
270
271
272 memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
273
274 nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache);
275 if (unlikely(!nr_pages))
276 return NULL;
277
278
279
280
281 for (i = 0; i < nr_pages; i++) {
282 page = pool->alloc.cache[i];
283 if ((pp_flags & PP_FLAG_DMA_MAP) &&
284 unlikely(!page_pool_dma_map(pool, page))) {
285 put_page(page);
286 continue;
287 }
288
289 page_pool_set_pp_info(pool, page);
290 pool->alloc.cache[pool->alloc.count++] = page;
291
292 pool->pages_state_hold_cnt++;
293 trace_page_pool_state_hold(pool, page,
294 pool->pages_state_hold_cnt);
295 }
296
297
298 if (likely(pool->alloc.count > 0))
299 page = pool->alloc.cache[--pool->alloc.count];
300 else
301 page = NULL;
302
303
304 return page;
305}
306
307
308
309
310struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
311{
312 struct page *page;
313
314
315 page = __page_pool_get_cached(pool);
316 if (page)
317 return page;
318
319
320 page = __page_pool_alloc_pages_slow(pool, gfp);
321 return page;
322}
323EXPORT_SYMBOL(page_pool_alloc_pages);
324
325
326
327
328#define _distance(a, b) (s32)((a) - (b))
329
330static s32 page_pool_inflight(struct page_pool *pool)
331{
332 u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
333 u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
334 s32 inflight;
335
336 inflight = _distance(hold_cnt, release_cnt);
337
338 trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
339 WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
340
341 return inflight;
342}
343
344
345
346
347
348
349void page_pool_release_page(struct page_pool *pool, struct page *page)
350{
351 dma_addr_t dma;
352 int count;
353
354 if (!(pool->p.flags & PP_FLAG_DMA_MAP))
355
356
357
358 goto skip_dma_unmap;
359
360 dma = page_pool_get_dma_addr(page);
361
362
363 dma_unmap_page_attrs(pool->p.dev, dma,
364 PAGE_SIZE << pool->p.order, pool->p.dma_dir,
365 DMA_ATTR_SKIP_CPU_SYNC);
366 page_pool_set_dma_addr(page, 0);
367skip_dma_unmap:
368 page_pool_clear_pp_info(page);
369
370
371
372
373 count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
374 trace_page_pool_state_release(pool, page, count);
375}
376EXPORT_SYMBOL(page_pool_release_page);
377
378
379static void page_pool_return_page(struct page_pool *pool, struct page *page)
380{
381 page_pool_release_page(pool, page);
382
383 put_page(page);
384
385
386
387
388}
389
390static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
391{
392 int ret;
393
394 if (in_serving_softirq())
395 ret = ptr_ring_produce(&pool->ring, page);
396 else
397 ret = ptr_ring_produce_bh(&pool->ring, page);
398
399 return (ret == 0) ? true : false;
400}
401
402
403
404
405
406
407static bool page_pool_recycle_in_cache(struct page *page,
408 struct page_pool *pool)
409{
410 if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
411 return false;
412
413
414 pool->alloc.cache[pool->alloc.count++] = page;
415 return true;
416}
417
418
419
420
421
422
423
424static __always_inline struct page *
425__page_pool_put_page(struct page_pool *pool, struct page *page,
426 unsigned int dma_sync_size, bool allow_direct)
427{
428
429 if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
430 page_pool_atomic_sub_frag_count_return(page, 1))
431 return NULL;
432
433
434
435
436
437
438
439
440
441
442 if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
443
444
445 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
446 page_pool_dma_sync_for_device(pool, page,
447 dma_sync_size);
448
449 if (allow_direct && in_serving_softirq() &&
450 page_pool_recycle_in_cache(page, pool))
451 return NULL;
452
453
454 return page;
455 }
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470 page_pool_release_page(pool, page);
471 put_page(page);
472
473 return NULL;
474}
475
476void page_pool_put_page(struct page_pool *pool, struct page *page,
477 unsigned int dma_sync_size, bool allow_direct)
478{
479 page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
480 if (page && !page_pool_recycle_in_ring(pool, page)) {
481
482 page_pool_return_page(pool, page);
483 }
484}
485EXPORT_SYMBOL(page_pool_put_page);
486
487
488void page_pool_put_page_bulk(struct page_pool *pool, void **data,
489 int count)
490{
491 int i, bulk_len = 0;
492
493 for (i = 0; i < count; i++) {
494 struct page *page = virt_to_head_page(data[i]);
495
496 page = __page_pool_put_page(pool, page, -1, false);
497
498 if (page)
499 data[bulk_len++] = page;
500 }
501
502 if (unlikely(!bulk_len))
503 return;
504
505
506 page_pool_ring_lock(pool);
507 for (i = 0; i < bulk_len; i++) {
508 if (__ptr_ring_produce(&pool->ring, data[i]))
509 break;
510 }
511 page_pool_ring_unlock(pool);
512
513
514 if (likely(i == bulk_len))
515 return;
516
517
518
519
520 for (; i < bulk_len; i++)
521 page_pool_return_page(pool, data[i]);
522}
523EXPORT_SYMBOL(page_pool_put_page_bulk);
524
525static struct page *page_pool_drain_frag(struct page_pool *pool,
526 struct page *page)
527{
528 long drain_count = BIAS_MAX - pool->frag_users;
529
530
531 if (likely(page_pool_atomic_sub_frag_count_return(page,
532 drain_count)))
533 return NULL;
534
535 if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
536 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
537 page_pool_dma_sync_for_device(pool, page, -1);
538
539 return page;
540 }
541
542 page_pool_return_page(pool, page);
543 return NULL;
544}
545
546static void page_pool_free_frag(struct page_pool *pool)
547{
548 long drain_count = BIAS_MAX - pool->frag_users;
549 struct page *page = pool->frag_page;
550
551 pool->frag_page = NULL;
552
553 if (!page ||
554 page_pool_atomic_sub_frag_count_return(page, drain_count))
555 return;
556
557 page_pool_return_page(pool, page);
558}
559
560struct page *page_pool_alloc_frag(struct page_pool *pool,
561 unsigned int *offset,
562 unsigned int size, gfp_t gfp)
563{
564 unsigned int max_size = PAGE_SIZE << pool->p.order;
565 struct page *page = pool->frag_page;
566
567 if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
568 size > max_size))
569 return NULL;
570
571 size = ALIGN(size, dma_get_cache_alignment());
572 *offset = pool->frag_offset;
573
574 if (page && *offset + size > max_size) {
575 page = page_pool_drain_frag(pool, page);
576 if (page)
577 goto frag_reset;
578 }
579
580 if (!page) {
581 page = page_pool_alloc_pages(pool, gfp);
582 if (unlikely(!page)) {
583 pool->frag_page = NULL;
584 return NULL;
585 }
586
587 pool->frag_page = page;
588
589frag_reset:
590 pool->frag_users = 1;
591 *offset = 0;
592 pool->frag_offset = size;
593 page_pool_set_frag_count(page, BIAS_MAX);
594 return page;
595 }
596
597 pool->frag_users++;
598 pool->frag_offset = *offset + size;
599 return page;
600}
601EXPORT_SYMBOL(page_pool_alloc_frag);
602
603static void page_pool_empty_ring(struct page_pool *pool)
604{
605 struct page *page;
606
607
608 while ((page = ptr_ring_consume_bh(&pool->ring))) {
609
610 if (!(page_ref_count(page) == 1))
611 pr_crit("%s() page_pool refcnt %d violation\n",
612 __func__, page_ref_count(page));
613
614 page_pool_return_page(pool, page);
615 }
616}
617
618static void page_pool_free(struct page_pool *pool)
619{
620 if (pool->disconnect)
621 pool->disconnect(pool);
622
623 ptr_ring_cleanup(&pool->ring, NULL);
624
625 if (pool->p.flags & PP_FLAG_DMA_MAP)
626 put_device(pool->p.dev);
627
628 kfree(pool);
629}
630
631static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
632{
633 struct page *page;
634
635 if (pool->destroy_cnt)
636 return;
637
638
639
640
641
642 while (pool->alloc.count) {
643 page = pool->alloc.cache[--pool->alloc.count];
644 page_pool_return_page(pool, page);
645 }
646}
647
648static void page_pool_scrub(struct page_pool *pool)
649{
650 page_pool_empty_alloc_cache_once(pool);
651 pool->destroy_cnt++;
652
653
654
655
656 page_pool_empty_ring(pool);
657}
658
659static int page_pool_release(struct page_pool *pool)
660{
661 int inflight;
662
663 page_pool_scrub(pool);
664 inflight = page_pool_inflight(pool);
665 if (!inflight)
666 page_pool_free(pool);
667
668 return inflight;
669}
670
671static void page_pool_release_retry(struct work_struct *wq)
672{
673 struct delayed_work *dwq = to_delayed_work(wq);
674 struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
675 int inflight;
676
677 inflight = page_pool_release(pool);
678 if (!inflight)
679 return;
680
681
682 if (time_after_eq(jiffies, pool->defer_warn)) {
683 int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
684
685 pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
686 __func__, inflight, sec);
687 pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
688 }
689
690
691 schedule_delayed_work(&pool->release_dw, DEFER_TIME);
692}
693
694void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
695{
696 refcount_inc(&pool->user_cnt);
697 pool->disconnect = disconnect;
698}
699
700void page_pool_destroy(struct page_pool *pool)
701{
702 if (!pool)
703 return;
704
705 if (!page_pool_put(pool))
706 return;
707
708 page_pool_free_frag(pool);
709
710 if (!page_pool_release(pool))
711 return;
712
713 pool->defer_start = jiffies;
714 pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
715
716 INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
717 schedule_delayed_work(&pool->release_dw, DEFER_TIME);
718}
719EXPORT_SYMBOL(page_pool_destroy);
720
721
722void page_pool_update_nid(struct page_pool *pool, int new_nid)
723{
724 struct page *page;
725
726 trace_page_pool_update_nid(pool, new_nid);
727 pool->p.nid = new_nid;
728
729
730 while (pool->alloc.count) {
731 page = pool->alloc.cache[--pool->alloc.count];
732 page_pool_return_page(pool, page);
733 }
734}
735EXPORT_SYMBOL(page_pool_update_nid);
736
737bool page_pool_return_skb_page(struct page *page)
738{
739 struct page_pool *pp;
740
741 page = compound_head(page);
742
743
744
745
746
747
748
749
750 if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
751 return false;
752
753 pp = page->pp;
754
755
756
757
758
759
760 page_pool_put_full_page(pp, page, false);
761
762 return true;
763}
764EXPORT_SYMBOL(page_pool_return_skb_page);
765