1
2
3
4
5#include <errno.h>
6#include <stdio.h>
7#include <stdint.h>
8#include <stdlib.h>
9#include <string.h>
10#include <inttypes.h>
11
12#include <rte_fbarray.h>
13#include <rte_memory.h>
14#include <rte_eal.h>
15#include <rte_eal_memconfig.h>
16#include <rte_eal_paging.h>
17#include <rte_errno.h>
18#include <rte_log.h>
19#ifndef RTE_EXEC_ENV_WINDOWS
20#include <rte_telemetry.h>
21#endif
22
23#include "eal_memalloc.h"
24#include "eal_private.h"
25#include "eal_internal_cfg.h"
26#include "eal_memcfg.h"
27#include "eal_options.h"
28#include "malloc_heap.h"
29
30
31
32
33
34
35
36
37
38#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
39
40static void *next_baseaddr;
41static uint64_t system_page_sz;
42
43#define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
44void *
45eal_get_virtual_area(void *requested_addr, size_t *size,
46 size_t page_sz, int flags, int reserve_flags)
47{
48 bool addr_is_hint, allow_shrink, unmap, no_align;
49 uint64_t map_sz;
50 void *mapped_addr, *aligned_addr;
51 uint8_t try = 0;
52 struct internal_config *internal_conf =
53 eal_get_internal_configuration();
54
55 if (system_page_sz == 0)
56 system_page_sz = rte_mem_page_size();
57
58 RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
59
60 addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
61 allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
62 unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
63
64 if (next_baseaddr == NULL && internal_conf->base_virtaddr != 0 &&
65 rte_eal_process_type() == RTE_PROC_PRIMARY)
66 next_baseaddr = (void *) internal_conf->base_virtaddr;
67
68#ifdef RTE_ARCH_64
69 if (next_baseaddr == NULL && internal_conf->base_virtaddr == 0 &&
70 rte_eal_process_type() == RTE_PROC_PRIMARY)
71 next_baseaddr = (void *) eal_get_baseaddr();
72#endif
73 if (requested_addr == NULL && next_baseaddr != NULL) {
74 requested_addr = next_baseaddr;
75 requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
76 addr_is_hint = true;
77 }
78
79
80
81
82
83
84
85
86
87 no_align = (requested_addr != NULL &&
88 requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
89 !addr_is_hint) ||
90 page_sz == system_page_sz;
91
92 do {
93 map_sz = no_align ? *size : *size + page_sz;
94 if (map_sz > SIZE_MAX) {
95 RTE_LOG(ERR, EAL, "Map size too big\n");
96 rte_errno = E2BIG;
97 return NULL;
98 }
99
100 mapped_addr = eal_mem_reserve(
101 requested_addr, (size_t)map_sz, reserve_flags);
102 if ((mapped_addr == NULL) && allow_shrink)
103 *size -= page_sz;
104
105 if ((mapped_addr != NULL) && addr_is_hint &&
106 (mapped_addr != requested_addr)) {
107 try++;
108 next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
109 if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
110
111 eal_mem_free(mapped_addr, map_sz);
112 mapped_addr = NULL;
113 requested_addr = next_baseaddr;
114 }
115 }
116 } while ((allow_shrink || addr_is_hint) &&
117 (mapped_addr == NULL) && (*size > 0));
118
119
120
121
122 aligned_addr = no_align ? mapped_addr :
123 RTE_PTR_ALIGN(mapped_addr, page_sz);
124
125 if (*size == 0) {
126 RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
127 rte_strerror(rte_errno));
128 return NULL;
129 } else if (mapped_addr == NULL) {
130 RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
131 rte_strerror(rte_errno));
132 return NULL;
133 } else if (requested_addr != NULL && !addr_is_hint &&
134 aligned_addr != requested_addr) {
135 RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
136 requested_addr, aligned_addr);
137 eal_mem_free(mapped_addr, map_sz);
138 rte_errno = EADDRNOTAVAIL;
139 return NULL;
140 } else if (requested_addr != NULL && addr_is_hint &&
141 aligned_addr != requested_addr) {
142
143
144
145
146 if (internal_conf->base_virtaddr != 0) {
147 RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
148 requested_addr, aligned_addr);
149 RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
150 } else {
151 RTE_LOG(DEBUG, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
152 requested_addr, aligned_addr);
153 RTE_LOG(DEBUG, EAL, " This may cause issues with mapping memory into secondary processes\n");
154 }
155 } else if (next_baseaddr != NULL) {
156 next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
157 }
158
159 RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
160 aligned_addr, *size);
161
162 if (unmap) {
163 eal_mem_free(mapped_addr, map_sz);
164 } else if (!no_align) {
165 void *map_end, *aligned_end;
166 size_t before_len, after_len;
167
168
169
170
171
172
173
174
175 map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
176 aligned_end = RTE_PTR_ADD(aligned_addr, *size);
177
178
179 before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
180 if (before_len > 0)
181 eal_mem_free(mapped_addr, before_len);
182
183
184 after_len = RTE_PTR_DIFF(map_end, aligned_end);
185 if (after_len > 0)
186 eal_mem_free(aligned_end, after_len);
187 }
188
189 if (!unmap) {
190
191 eal_mem_set_dump(aligned_addr, *size, false);
192 }
193
194 return aligned_addr;
195}
196
197int
198eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name,
199 uint64_t page_sz, int n_segs, int socket_id, bool heap)
200{
201 if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
202 sizeof(struct rte_memseg))) {
203 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
204 rte_strerror(rte_errno));
205 return -1;
206 }
207
208 msl->page_sz = page_sz;
209 msl->socket_id = socket_id;
210 msl->base_va = NULL;
211 msl->heap = heap;
212
213 RTE_LOG(DEBUG, EAL,
214 "Memseg list allocated at socket %i, page size 0x%"PRIx64"kB\n",
215 socket_id, page_sz >> 10);
216
217 return 0;
218}
219
220int
221eal_memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
222 int n_segs, int socket_id, int type_msl_idx, bool heap)
223{
224 char name[RTE_FBARRAY_NAME_LEN];
225
226 snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
227 type_msl_idx);
228
229 return eal_memseg_list_init_named(
230 msl, name, page_sz, n_segs, socket_id, heap);
231}
232
233int
234eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags)
235{
236 size_t page_sz, mem_sz;
237 void *addr;
238
239 page_sz = msl->page_sz;
240 mem_sz = page_sz * msl->memseg_arr.len;
241
242 addr = eal_get_virtual_area(
243 msl->base_va, &mem_sz, page_sz, 0, reserve_flags);
244 if (addr == NULL) {
245#ifndef RTE_EXEC_ENV_WINDOWS
246
247
248
249
250
251 if (rte_errno == EADDRNOTAVAIL)
252 RTE_LOG(ERR, EAL, "Cannot reserve %llu bytes at [%p] - "
253 "please use '--" OPT_BASE_VIRTADDR "' option\n",
254 (unsigned long long)mem_sz, msl->base_va);
255#endif
256 return -1;
257 }
258 msl->base_va = addr;
259 msl->len = mem_sz;
260
261 RTE_LOG(DEBUG, EAL, "VA reserved for memseg list at %p, size %zx\n",
262 addr, mem_sz);
263
264 return 0;
265}
266
267void
268eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs)
269{
270 size_t page_sz = msl->page_sz;
271 int i;
272
273 for (i = 0; i < n_segs; i++) {
274 struct rte_fbarray *arr = &msl->memseg_arr;
275 struct rte_memseg *ms = rte_fbarray_get(arr, i);
276
277 if (rte_eal_iova_mode() == RTE_IOVA_VA)
278 ms->iova = (uintptr_t)addr;
279 else
280 ms->iova = RTE_BAD_IOVA;
281 ms->addr = addr;
282 ms->hugepage_sz = page_sz;
283 ms->socket_id = 0;
284 ms->len = page_sz;
285
286 rte_fbarray_set_used(arr, i);
287
288 addr = RTE_PTR_ADD(addr, page_sz);
289 }
290}
291
292static struct rte_memseg *
293virt2memseg(const void *addr, const struct rte_memseg_list *msl)
294{
295 const struct rte_fbarray *arr;
296 void *start, *end;
297 int ms_idx;
298
299 if (msl == NULL)
300 return NULL;
301
302
303 start = msl->base_va;
304 end = RTE_PTR_ADD(start, msl->len);
305
306 if (addr < start || addr >= end)
307 return NULL;
308
309
310 arr = &msl->memseg_arr;
311 ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
312 return rte_fbarray_get(arr, ms_idx);
313}
314
315static struct rte_memseg_list *
316virt2memseg_list(const void *addr)
317{
318 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
319 struct rte_memseg_list *msl;
320 int msl_idx;
321
322 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
323 void *start, *end;
324 msl = &mcfg->memsegs[msl_idx];
325
326 start = msl->base_va;
327 end = RTE_PTR_ADD(start, msl->len);
328 if (addr >= start && addr < end)
329 break;
330 }
331
332 if (msl_idx == RTE_MAX_MEMSEG_LISTS)
333 return NULL;
334 return msl;
335}
336
337struct rte_memseg_list *
338rte_mem_virt2memseg_list(const void *addr)
339{
340 return virt2memseg_list(addr);
341}
342
343struct virtiova {
344 rte_iova_t iova;
345 void *virt;
346};
347static int
348find_virt(const struct rte_memseg_list *msl __rte_unused,
349 const struct rte_memseg *ms, void *arg)
350{
351 struct virtiova *vi = arg;
352 if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
353 size_t offset = vi->iova - ms->iova;
354 vi->virt = RTE_PTR_ADD(ms->addr, offset);
355
356 return 1;
357 }
358 return 0;
359}
360static int
361find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
362 const struct rte_memseg *ms, size_t len, void *arg)
363{
364 struct virtiova *vi = arg;
365 if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
366 size_t offset = vi->iova - ms->iova;
367 vi->virt = RTE_PTR_ADD(ms->addr, offset);
368
369 return 1;
370 }
371 return 0;
372}
373
374void *
375rte_mem_iova2virt(rte_iova_t iova)
376{
377 struct virtiova vi;
378 const struct internal_config *internal_conf =
379 eal_get_internal_configuration();
380
381 memset(&vi, 0, sizeof(vi));
382
383 vi.iova = iova;
384
385
386
387 if (internal_conf->legacy_mem)
388 rte_memseg_contig_walk(find_virt_legacy, &vi);
389 else
390 rte_memseg_walk(find_virt, &vi);
391
392 return vi.virt;
393}
394
395struct rte_memseg *
396rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
397{
398 return virt2memseg(addr, msl != NULL ? msl :
399 rte_mem_virt2memseg_list(addr));
400}
401
402static int
403physmem_size(const struct rte_memseg_list *msl, void *arg)
404{
405 uint64_t *total_len = arg;
406
407 if (msl->external)
408 return 0;
409
410 *total_len += msl->memseg_arr.count * msl->page_sz;
411
412 return 0;
413}
414
415
416uint64_t
417rte_eal_get_physmem_size(void)
418{
419 uint64_t total_len = 0;
420
421 rte_memseg_list_walk(physmem_size, &total_len);
422
423 return total_len;
424}
425
426static int
427dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
428 void *arg)
429{
430 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
431 int msl_idx, ms_idx, fd;
432 FILE *f = arg;
433
434 msl_idx = msl - mcfg->memsegs;
435 if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
436 return -1;
437
438 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
439 if (ms_idx < 0)
440 return -1;
441
442 fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
443 fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
444 "virt:%p, socket_id:%"PRId32", "
445 "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
446 "nrank:%"PRIx32" fd:%i\n",
447 msl_idx, ms_idx,
448 ms->iova,
449 ms->len,
450 ms->addr,
451 ms->socket_id,
452 ms->hugepage_sz,
453 ms->nchannel,
454 ms->nrank,
455 fd);
456
457 return 0;
458}
459
460
461
462
463
464int
465rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
466 void *arg)
467{
468 const struct internal_config *internal_conf =
469 eal_get_internal_configuration();
470
471
472 if (internal_conf->legacy_mem) {
473 RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
474 rte_errno = ENOTSUP;
475 return -1;
476 }
477 return eal_memalloc_mem_event_callback_register(name, clb, arg);
478}
479
480int
481rte_mem_event_callback_unregister(const char *name, void *arg)
482{
483 const struct internal_config *internal_conf =
484 eal_get_internal_configuration();
485
486
487 if (internal_conf->legacy_mem) {
488 RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
489 rte_errno = ENOTSUP;
490 return -1;
491 }
492 return eal_memalloc_mem_event_callback_unregister(name, arg);
493}
494
495int
496rte_mem_alloc_validator_register(const char *name,
497 rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
498{
499 const struct internal_config *internal_conf =
500 eal_get_internal_configuration();
501
502
503 if (internal_conf->legacy_mem) {
504 RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
505 rte_errno = ENOTSUP;
506 return -1;
507 }
508 return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
509 limit);
510}
511
512int
513rte_mem_alloc_validator_unregister(const char *name, int socket_id)
514{
515 const struct internal_config *internal_conf =
516 eal_get_internal_configuration();
517
518
519 if (internal_conf->legacy_mem) {
520 RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
521 rte_errno = ENOTSUP;
522 return -1;
523 }
524 return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
525}
526
527
528void
529rte_dump_physmem_layout(FILE *f)
530{
531 rte_memseg_walk(dump_memseg, f);
532}
533
534static int
535check_iova(const struct rte_memseg_list *msl __rte_unused,
536 const struct rte_memseg *ms, void *arg)
537{
538 uint64_t *mask = arg;
539 rte_iova_t iova;
540
541
542 iova = (ms->iova + ms->len) - 1;
543 if (!(iova & *mask))
544 return 0;
545
546 RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
547 ms->iova, ms->len);
548
549 RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
550 return 1;
551}
552
553#define MAX_DMA_MASK_BITS 63
554
555
556static int
557check_dma_mask(uint8_t maskbits, bool thread_unsafe)
558{
559 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
560 uint64_t mask;
561 int ret;
562
563
564
565 if (maskbits > MAX_DMA_MASK_BITS) {
566 RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
567 maskbits, MAX_DMA_MASK_BITS);
568 return -1;
569 }
570
571
572 mask = ~((1ULL << maskbits) - 1);
573
574 if (thread_unsafe)
575 ret = rte_memseg_walk_thread_unsafe(check_iova, &mask);
576 else
577 ret = rte_memseg_walk(check_iova, &mask);
578
579 if (ret)
580
581
582
583
584
585 return 1;
586
587
588
589
590
591 mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
592 RTE_MIN(mcfg->dma_maskbits, maskbits);
593
594 return 0;
595}
596
597int
598rte_mem_check_dma_mask(uint8_t maskbits)
599{
600 return check_dma_mask(maskbits, false);
601}
602
603int
604rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits)
605{
606 return check_dma_mask(maskbits, true);
607}
608
609
610
611
612
613
614
615
616void
617rte_mem_set_dma_mask(uint8_t maskbits)
618{
619 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
620
621 mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
622 RTE_MIN(mcfg->dma_maskbits, maskbits);
623}
624
625
626unsigned rte_memory_get_nchannel(void)
627{
628 return rte_eal_get_configuration()->mem_config->nchannel;
629}
630
631
632unsigned rte_memory_get_nrank(void)
633{
634 return rte_eal_get_configuration()->mem_config->nrank;
635}
636
637static int
638rte_eal_memdevice_init(void)
639{
640 struct rte_config *config;
641 const struct internal_config *internal_conf;
642
643 if (rte_eal_process_type() == RTE_PROC_SECONDARY)
644 return 0;
645
646 internal_conf = eal_get_internal_configuration();
647 config = rte_eal_get_configuration();
648 config->mem_config->nchannel = internal_conf->force_nchannel;
649 config->mem_config->nrank = internal_conf->force_nrank;
650
651 return 0;
652}
653
654
655int
656rte_mem_lock_page(const void *virt)
657{
658 uintptr_t virtual = (uintptr_t)virt;
659 size_t page_size = rte_mem_page_size();
660 uintptr_t aligned = RTE_PTR_ALIGN_FLOOR(virtual, page_size);
661 return rte_mem_lock((void *)aligned, page_size);
662}
663
664int
665rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
666{
667 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
668 int i, ms_idx, ret = 0;
669
670 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
671 struct rte_memseg_list *msl = &mcfg->memsegs[i];
672 const struct rte_memseg *ms;
673 struct rte_fbarray *arr;
674
675 if (msl->memseg_arr.count == 0)
676 continue;
677
678 arr = &msl->memseg_arr;
679
680 ms_idx = rte_fbarray_find_next_used(arr, 0);
681 while (ms_idx >= 0) {
682 int n_segs;
683 size_t len;
684
685 ms = rte_fbarray_get(arr, ms_idx);
686
687
688
689
690 n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
691 len = n_segs * msl->page_sz;
692
693 ret = func(msl, ms, len, arg);
694 if (ret)
695 return ret;
696 ms_idx = rte_fbarray_find_next_used(arr,
697 ms_idx + n_segs);
698 }
699 }
700 return 0;
701}
702
703int
704rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
705{
706 int ret = 0;
707
708
709 rte_mcfg_mem_read_lock();
710 ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
711 rte_mcfg_mem_read_unlock();
712
713 return ret;
714}
715
716int
717rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
718{
719 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
720 int i, ms_idx, ret = 0;
721
722 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
723 struct rte_memseg_list *msl = &mcfg->memsegs[i];
724 const struct rte_memseg *ms;
725 struct rte_fbarray *arr;
726
727 if (msl->memseg_arr.count == 0)
728 continue;
729
730 arr = &msl->memseg_arr;
731
732 ms_idx = rte_fbarray_find_next_used(arr, 0);
733 while (ms_idx >= 0) {
734 ms = rte_fbarray_get(arr, ms_idx);
735 ret = func(msl, ms, arg);
736 if (ret)
737 return ret;
738 ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
739 }
740 }
741 return 0;
742}
743
744int
745rte_memseg_walk(rte_memseg_walk_t func, void *arg)
746{
747 int ret = 0;
748
749
750 rte_mcfg_mem_read_lock();
751 ret = rte_memseg_walk_thread_unsafe(func, arg);
752 rte_mcfg_mem_read_unlock();
753
754 return ret;
755}
756
757int
758rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
759{
760 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
761 int i, ret = 0;
762
763 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
764 struct rte_memseg_list *msl = &mcfg->memsegs[i];
765
766 if (msl->base_va == NULL)
767 continue;
768
769 ret = func(msl, arg);
770 if (ret)
771 return ret;
772 }
773 return 0;
774}
775
776int
777rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
778{
779 int ret = 0;
780
781
782 rte_mcfg_mem_read_lock();
783 ret = rte_memseg_list_walk_thread_unsafe(func, arg);
784 rte_mcfg_mem_read_unlock();
785
786 return ret;
787}
788
789int
790rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
791{
792 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
793 struct rte_memseg_list *msl;
794 struct rte_fbarray *arr;
795 int msl_idx, seg_idx, ret;
796
797 if (ms == NULL) {
798 rte_errno = EINVAL;
799 return -1;
800 }
801
802 msl = rte_mem_virt2memseg_list(ms->addr);
803 if (msl == NULL) {
804 rte_errno = EINVAL;
805 return -1;
806 }
807 arr = &msl->memseg_arr;
808
809 msl_idx = msl - mcfg->memsegs;
810 seg_idx = rte_fbarray_find_idx(arr, ms);
811
812 if (!rte_fbarray_is_used(arr, seg_idx)) {
813 rte_errno = ENOENT;
814 return -1;
815 }
816
817
818 if (msl->external) {
819 rte_errno = ENOTSUP;
820 return -1;
821 }
822
823 ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
824 if (ret < 0) {
825 rte_errno = -ret;
826 ret = -1;
827 }
828 return ret;
829}
830
831int
832rte_memseg_get_fd(const struct rte_memseg *ms)
833{
834 int ret;
835
836 rte_mcfg_mem_read_lock();
837 ret = rte_memseg_get_fd_thread_unsafe(ms);
838 rte_mcfg_mem_read_unlock();
839
840 return ret;
841}
842
843int
844rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
845 size_t *offset)
846{
847 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
848 struct rte_memseg_list *msl;
849 struct rte_fbarray *arr;
850 int msl_idx, seg_idx, ret;
851
852 if (ms == NULL || offset == NULL) {
853 rte_errno = EINVAL;
854 return -1;
855 }
856
857 msl = rte_mem_virt2memseg_list(ms->addr);
858 if (msl == NULL) {
859 rte_errno = EINVAL;
860 return -1;
861 }
862 arr = &msl->memseg_arr;
863
864 msl_idx = msl - mcfg->memsegs;
865 seg_idx = rte_fbarray_find_idx(arr, ms);
866
867 if (!rte_fbarray_is_used(arr, seg_idx)) {
868 rte_errno = ENOENT;
869 return -1;
870 }
871
872
873 if (msl->external) {
874 rte_errno = ENOTSUP;
875 return -1;
876 }
877
878 ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
879 if (ret < 0) {
880 rte_errno = -ret;
881 ret = -1;
882 }
883 return ret;
884}
885
886int
887rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
888{
889 int ret;
890
891 rte_mcfg_mem_read_lock();
892 ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
893 rte_mcfg_mem_read_unlock();
894
895 return ret;
896}
897
898int
899rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
900 unsigned int n_pages, size_t page_sz)
901{
902 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
903 unsigned int socket_id, n;
904 int ret = 0;
905
906 if (va_addr == NULL || page_sz == 0 || len == 0 ||
907 !rte_is_power_of_2(page_sz) ||
908 RTE_ALIGN(len, page_sz) != len ||
909 ((len / page_sz) != n_pages && iova_addrs != NULL) ||
910 !rte_is_aligned(va_addr, page_sz)) {
911 rte_errno = EINVAL;
912 return -1;
913 }
914 rte_mcfg_mem_write_lock();
915
916
917 if (malloc_heap_find_external_seg(va_addr, len) != NULL) {
918 rte_errno = EEXIST;
919 ret = -1;
920 goto unlock;
921 }
922
923
924 socket_id = mcfg->next_socket_id;
925 if (socket_id > INT32_MAX) {
926 RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
927 rte_errno = ENOSPC;
928 ret = -1;
929 goto unlock;
930 }
931
932
933 n = len / page_sz;
934 if (malloc_heap_create_external_seg(va_addr, iova_addrs, n,
935 page_sz, "extmem", socket_id) == NULL) {
936 ret = -1;
937 goto unlock;
938 }
939
940
941 mcfg->next_socket_id++;
942unlock:
943 rte_mcfg_mem_write_unlock();
944 return ret;
945}
946
947int
948rte_extmem_unregister(void *va_addr, size_t len)
949{
950 struct rte_memseg_list *msl;
951 int ret = 0;
952
953 if (va_addr == NULL || len == 0) {
954 rte_errno = EINVAL;
955 return -1;
956 }
957 rte_mcfg_mem_write_lock();
958
959
960 msl = malloc_heap_find_external_seg(va_addr, len);
961 if (msl == NULL) {
962 rte_errno = ENOENT;
963 ret = -1;
964 goto unlock;
965 }
966
967 ret = malloc_heap_destroy_external_seg(msl);
968unlock:
969 rte_mcfg_mem_write_unlock();
970 return ret;
971}
972
973static int
974sync_memory(void *va_addr, size_t len, bool attach)
975{
976 struct rte_memseg_list *msl;
977 int ret = 0;
978
979 if (va_addr == NULL || len == 0) {
980 rte_errno = EINVAL;
981 return -1;
982 }
983 rte_mcfg_mem_write_lock();
984
985
986 msl = malloc_heap_find_external_seg(va_addr, len);
987 if (msl == NULL) {
988 rte_errno = ENOENT;
989 ret = -1;
990 goto unlock;
991 }
992 if (attach)
993 ret = rte_fbarray_attach(&msl->memseg_arr);
994 else
995 ret = rte_fbarray_detach(&msl->memseg_arr);
996
997unlock:
998 rte_mcfg_mem_write_unlock();
999 return ret;
1000}
1001
1002int
1003rte_extmem_attach(void *va_addr, size_t len)
1004{
1005 return sync_memory(va_addr, len, true);
1006}
1007
1008int
1009rte_extmem_detach(void *va_addr, size_t len)
1010{
1011 return sync_memory(va_addr, len, false);
1012}
1013
1014
1015int
1016rte_eal_memory_detach(void)
1017{
1018 const struct internal_config *internal_conf =
1019 eal_get_internal_configuration();
1020 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1021 size_t page_sz = rte_mem_page_size();
1022 unsigned int i;
1023
1024 if (internal_conf->in_memory == 1)
1025 return 0;
1026
1027 rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
1028
1029
1030 if (eal_memalloc_cleanup())
1031 RTE_LOG(ERR, EAL, "Could not release memory subsystem data\n");
1032
1033 for (i = 0; i < RTE_DIM(mcfg->memsegs); i++) {
1034 struct rte_memseg_list *msl = &mcfg->memsegs[i];
1035
1036
1037 if (msl->base_va == NULL)
1038 continue;
1039
1040
1041
1042
1043
1044
1045
1046 if (!msl->external)
1047 if (rte_mem_unmap(msl->base_va, msl->len) != 0)
1048 RTE_LOG(ERR, EAL, "Could not unmap memory: %s\n",
1049 rte_strerror(rte_errno));
1050
1051
1052
1053
1054
1055
1056 if (rte_fbarray_detach(&msl->memseg_arr))
1057 RTE_LOG(ERR, EAL, "Could not detach fbarray: %s\n",
1058 rte_strerror(rte_errno));
1059 }
1060 rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
1061
1062
1063
1064
1065
1066
1067 if (internal_conf->no_shconf == 0 && mcfg->mem_cfg_addr != 0) {
1068 if (rte_mem_unmap(mcfg, RTE_ALIGN(sizeof(*mcfg), page_sz)) != 0)
1069 RTE_LOG(ERR, EAL, "Could not unmap shared memory config: %s\n",
1070 rte_strerror(rte_errno));
1071 }
1072 rte_eal_get_configuration()->mem_config = NULL;
1073
1074 return 0;
1075}
1076
1077
1078int
1079rte_eal_memory_init(void)
1080{
1081 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1082 const struct internal_config *internal_conf =
1083 eal_get_internal_configuration();
1084
1085 int retval;
1086 RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
1087
1088 if (!mcfg)
1089 return -1;
1090
1091
1092 rte_mcfg_mem_read_lock();
1093
1094 if (rte_eal_memseg_init() < 0)
1095 goto fail;
1096
1097 if (eal_memalloc_init() < 0)
1098 goto fail;
1099
1100 retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
1101 rte_eal_hugepage_init() :
1102 rte_eal_hugepage_attach();
1103 if (retval < 0)
1104 goto fail;
1105
1106 if (internal_conf->no_shconf == 0 && rte_eal_memdevice_init() < 0)
1107 goto fail;
1108
1109 return 0;
1110fail:
1111 rte_mcfg_mem_read_unlock();
1112 return -1;
1113}
1114
1115#ifndef RTE_EXEC_ENV_WINDOWS
1116#define EAL_MEMZONE_LIST_REQ "/eal/memzone_list"
1117#define EAL_MEMZONE_INFO_REQ "/eal/memzone_info"
1118#define EAL_HEAP_LIST_REQ "/eal/heap_list"
1119#define EAL_HEAP_INFO_REQ "/eal/heap_info"
1120#define ADDR_STR 15
1121
1122
1123static int
1124handle_eal_heap_info_request(const char *cmd __rte_unused, const char *params,
1125 struct rte_tel_data *d)
1126{
1127 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1128 struct rte_malloc_socket_stats sock_stats;
1129 struct malloc_heap *heap;
1130 unsigned int heap_id;
1131
1132 if (params == NULL || strlen(params) == 0)
1133 return -1;
1134
1135 heap_id = (unsigned int)strtoul(params, NULL, 10);
1136
1137
1138 heap = &mcfg->malloc_heaps[heap_id];
1139 malloc_heap_get_stats(heap, &sock_stats);
1140
1141 rte_tel_data_start_dict(d);
1142 rte_tel_data_add_dict_int(d, "Head id", heap_id);
1143 rte_tel_data_add_dict_string(d, "Name", heap->name);
1144 rte_tel_data_add_dict_u64(d, "Heap_size",
1145 sock_stats.heap_totalsz_bytes);
1146 rte_tel_data_add_dict_u64(d, "Free_size", sock_stats.heap_freesz_bytes);
1147 rte_tel_data_add_dict_u64(d, "Alloc_size",
1148 sock_stats.heap_allocsz_bytes);
1149 rte_tel_data_add_dict_u64(d, "Greatest_free_size",
1150 sock_stats.greatest_free_size);
1151 rte_tel_data_add_dict_u64(d, "Alloc_count", sock_stats.alloc_count);
1152 rte_tel_data_add_dict_u64(d, "Free_count", sock_stats.free_count);
1153
1154 return 0;
1155}
1156
1157
1158static int
1159handle_eal_heap_list_request(const char *cmd __rte_unused,
1160 const char *params __rte_unused,
1161 struct rte_tel_data *d)
1162{
1163 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1164 struct rte_malloc_socket_stats sock_stats;
1165 unsigned int heap_id;
1166
1167 rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
1168
1169 for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
1170 struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
1171
1172 malloc_heap_get_stats(heap, &sock_stats);
1173 if (sock_stats.heap_totalsz_bytes != 0)
1174 rte_tel_data_add_array_int(d, heap_id);
1175 }
1176
1177 return 0;
1178}
1179
1180
1181static int
1182handle_eal_memzone_info_request(const char *cmd __rte_unused,
1183 const char *params, struct rte_tel_data *d)
1184{
1185 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1186 struct rte_memseg_list *msl = NULL;
1187 int ms_idx, ms_count = 0;
1188 void *cur_addr, *mz_end;
1189 struct rte_memzone *mz;
1190 struct rte_memseg *ms;
1191 char addr[ADDR_STR];
1192 unsigned int mz_idx;
1193 size_t page_sz;
1194
1195 if (params == NULL || strlen(params) == 0)
1196 return -1;
1197
1198 mz_idx = strtoul(params, NULL, 10);
1199
1200
1201 mz = rte_fbarray_get(&mcfg->memzones, mz_idx);
1202
1203 rte_tel_data_start_dict(d);
1204 rte_tel_data_add_dict_int(d, "Zone", mz_idx);
1205 rte_tel_data_add_dict_string(d, "Name", mz->name);
1206 rte_tel_data_add_dict_int(d, "Length", mz->len);
1207 snprintf(addr, ADDR_STR, "%p", mz->addr);
1208 rte_tel_data_add_dict_string(d, "Address", addr);
1209 rte_tel_data_add_dict_int(d, "Socket", mz->socket_id);
1210 rte_tel_data_add_dict_int(d, "Flags", mz->flags);
1211
1212
1213 msl = rte_mem_virt2memseg_list(mz->addr);
1214 if (!msl) {
1215 RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
1216 return -1;
1217 }
1218 page_sz = (size_t)mz->hugepage_sz;
1219 cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
1220 mz_end = RTE_PTR_ADD(cur_addr, mz->len);
1221
1222 ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
1223 ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
1224
1225 rte_tel_data_add_dict_int(d, "Hugepage_size", page_sz);
1226 snprintf(addr, ADDR_STR, "%p", ms->addr);
1227 rte_tel_data_add_dict_string(d, "Hugepage_base", addr);
1228
1229 do {
1230
1231 cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
1232
1233
1234 ++ms;
1235 ms_count++;
1236 } while (cur_addr < mz_end);
1237
1238 rte_tel_data_add_dict_int(d, "Hugepage_used", ms_count);
1239
1240 return 0;
1241}
1242
1243static void
1244memzone_list_cb(const struct rte_memzone *mz __rte_unused,
1245 void *arg __rte_unused)
1246{
1247 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1248 struct rte_tel_data *d = arg;
1249 int mz_idx;
1250
1251 mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
1252 rte_tel_data_add_array_int(d, mz_idx);
1253}
1254
1255
1256
1257static int
1258handle_eal_memzone_list_request(const char *cmd __rte_unused,
1259 const char *params __rte_unused,
1260 struct rte_tel_data *d)
1261{
1262 rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
1263 rte_memzone_walk(memzone_list_cb, d);
1264
1265 return 0;
1266}
1267
1268RTE_INIT(memory_telemetry)
1269{
1270 rte_telemetry_register_cmd(
1271 EAL_MEMZONE_LIST_REQ, handle_eal_memzone_list_request,
1272 "List of memzone index reserved. Takes no parameters");
1273 rte_telemetry_register_cmd(
1274 EAL_MEMZONE_INFO_REQ, handle_eal_memzone_info_request,
1275 "Returns memzone info. Parameters: int mz_id");
1276 rte_telemetry_register_cmd(
1277 EAL_HEAP_LIST_REQ, handle_eal_heap_list_request,
1278 "List of heap index setup. Takes no parameters");
1279 rte_telemetry_register_cmd(
1280 EAL_HEAP_INFO_REQ, handle_eal_heap_info_request,
1281 "Returns malloc heap stats. Parameters: int heap_id");
1282}
1283#endif
1284