1/* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5#ifndef _RTE_MEMORY_H_ 6#define _RTE_MEMORY_H_ 7 8/** 9 * @file 10 * 11 * Memory-related RTE API. 12 */ 13 14#include <stdint.h> 15#include <stddef.h> 16#include <stdio.h> 17 18#ifdef __cplusplus 19extern "C" { 20#endif 21 22#include <rte_bitops.h> 23#include <rte_common.h> 24#include <rte_config.h> 25#include <rte_fbarray.h> 26 27#define RTE_PGSIZE_4K (1ULL << 12) 28#define RTE_PGSIZE_64K (1ULL << 16) 29#define RTE_PGSIZE_256K (1ULL << 18) 30#define RTE_PGSIZE_2M (1ULL << 21) 31#define RTE_PGSIZE_16M (1ULL << 24) 32#define RTE_PGSIZE_256M (1ULL << 28) 33#define RTE_PGSIZE_512M (1ULL << 29) 34#define RTE_PGSIZE_1G (1ULL << 30) 35#define RTE_PGSIZE_4G (1ULL << 32) 36#define RTE_PGSIZE_16G (1ULL << 34) 37 38#define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ 39 40/** Prevent this segment from being freed back to the OS. */ 41#define RTE_MEMSEG_FLAG_DO_NOT_FREE RTE_BIT32(0) 42/** This segment is not filled with zeros. */ 43#define RTE_MEMSEG_FLAG_DIRTY RTE_BIT32(1) 44 45/** 46 * Physical memory segment descriptor. 47 */ 48struct rte_memseg { 49 rte_iova_t iova; /**< Start IO address. */ 50 RTE_STD_C11 51 union { 52 void *addr; /**< Start virtual address. */ 53 uint64_t addr_64; /**< Makes sure addr is always 64 bits */ 54 }; 55 size_t len; /**< Length of the segment. */ 56 uint64_t hugepage_sz; /**< The pagesize of underlying memory */ 57 int32_t socket_id; /**< NUMA socket ID. */ 58 uint32_t nchannel; /**< Number of channels. */ 59 uint32_t nrank; /**< Number of ranks. */ 60 uint32_t flags; /**< Memseg-specific flags */ 61} __rte_packed; 62 63/** 64 * memseg list is a special case as we need to store a bunch of other data 65 * together with the array itself. 66 */ 67struct rte_memseg_list { 68 RTE_STD_C11 69 union { 70 void *base_va; 71 /**< Base virtual address for this memseg list. */ 72 uint64_t addr_64; 73 /**< Makes sure addr is always 64-bits */ 74 }; 75 uint64_t page_sz; /**< Page size for all memsegs in this list. */ 76 int socket_id; /**< Socket ID for all memsegs in this list. */ 77 volatile uint32_t version; /**< version number for multiprocess sync. */ 78 size_t len; /**< Length of memory area covered by this memseg list. */ 79 unsigned int external; /**< 1 if this list points to external memory */ 80 unsigned int heap; /**< 1 if this list points to a heap */ 81 struct rte_fbarray memseg_arr; 82}; 83 84/** 85 * Lock page in physical memory and prevent from swapping. 86 * 87 * @param virt 88 * The virtual address. 89 * @return 90 * 0 on success, negative on error. 91 */ 92int rte_mem_lock_page(const void *virt); 93 94/** 95 * Get physical address of any mapped virtual address in the current process. 96 * It is found by browsing the /proc/self/pagemap special file. 97 * The page must be locked. 98 * 99 * @param virt 100 * The virtual address. 101 * @return 102 * The physical address or RTE_BAD_IOVA on error. 103 */ 104phys_addr_t rte_mem_virt2phy(const void *virt); 105 106/** 107 * Get IO virtual address of any mapped virtual address in the current process. 108 * 109 * @note This function will not check internal page table. Instead, in IOVA as 110 * PA mode, it will fall back to getting real physical address (which may 111 * not match the expected IOVA, such as what was specified for external 112 * memory). 113 * 114 * @param virt 115 * The virtual address. 116 * @return 117 * The IO address or RTE_BAD_IOVA on error. 118 */ 119rte_iova_t rte_mem_virt2iova(const void *virt); 120 121/** 122 * Get virtual memory address corresponding to iova address. 123 * 124 * @note This function read-locks the memory hotplug subsystem, and thus cannot 125 * be used within memory-related callback functions. 126 * 127 * @param iova 128 * The iova address. 129 * @return 130 * Virtual address corresponding to iova address (or NULL if address does not 131 * exist within DPDK memory map). 132 */ 133void * 134rte_mem_iova2virt(rte_iova_t iova); 135 136/** 137 * Get memseg to which a particular virtual address belongs. 138 * 139 * @param virt 140 * The virtual address. 141 * @param msl 142 * The memseg list in which to look up based on ``virt`` address 143 * (can be NULL). 144 * @return 145 * Memseg pointer on success, or NULL on error. 146 */ 147struct rte_memseg * 148rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); 149 150/** 151 * Get memseg list corresponding to virtual memory address. 152 * 153 * @param virt 154 * The virtual address. 155 * @return 156 * Memseg list to which this virtual address belongs to. 157 */ 158struct rte_memseg_list * 159rte_mem_virt2memseg_list(const void *virt); 160 161/** 162 * Memseg walk function prototype. 163 * 164 * Returning 0 will continue walk 165 * Returning 1 will stop the walk 166 * Returning -1 will stop the walk and report error 167 */ 168typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl, 169 const struct rte_memseg *ms, void *arg); 170 171/** 172 * Memseg contig walk function prototype. This will trigger a callback on every 173 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each 174 * callback call will be [``ms->addr``, ``ms->addr + len``). 175 * 176 * Returning 0 will continue walk 177 * Returning 1 will stop the walk 178 * Returning -1 will stop the walk and report error 179 */ 180typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl, 181 const struct rte_memseg *ms, size_t len, void *arg); 182 183/** 184 * Memseg list walk function prototype. This will trigger a callback on every 185 * allocated memseg list. 186 * 187 * Returning 0 will continue walk 188 * Returning 1 will stop the walk 189 * Returning -1 will stop the walk and report error 190 */ 191typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, 192 void *arg); 193 194/** 195 * Walk list of all memsegs. 196 * 197 * @note This function read-locks the memory hotplug subsystem, and thus cannot 198 * be used within memory-related callback functions. 199 * 200 * @note This function will also walk through externally allocated segments. It 201 * is up to the user to decide whether to skip through these segments. 202 * 203 * @param func 204 * Iterator function 205 * @param arg 206 * Argument passed to iterator 207 * @return 208 * 0 if walked over the entire list 209 * 1 if stopped by the user 210 * -1 if user function reported error 211 */ 212int 213rte_memseg_walk(rte_memseg_walk_t func, void *arg); 214 215/** 216 * Walk each VA-contiguous area. 217 * 218 * @note This function read-locks the memory hotplug subsystem, and thus cannot 219 * be used within memory-related callback functions. 220 * 221 * @note This function will also walk through externally allocated segments. It 222 * is up to the user to decide whether to skip through these segments. 223 * 224 * @param func 225 * Iterator function 226 * @param arg 227 * Argument passed to iterator 228 * @return 229 * 0 if walked over the entire list 230 * 1 if stopped by the user 231 * -1 if user function reported error 232 */ 233int 234rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); 235 236/** 237 * Walk each allocated memseg list. 238 * 239 * @note This function read-locks the memory hotplug subsystem, and thus cannot 240 * be used within memory-related callback functions. 241 * 242 * @note This function will also walk through externally allocated segments. It 243 * is up to the user to decide whether to skip through these segments. 244 * 245 * @param func 246 * Iterator function 247 * @param arg 248 * Argument passed to iterator 249 * @return 250 * 0 if walked over the entire list 251 * 1 if stopped by the user 252 * -1 if user function reported error 253 */ 254int 255rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); 256 257/** 258 * Walk list of all memsegs without performing any locking. 259 * 260 * @note This function does not perform any locking, and is only safe to call 261 * from within memory-related callback functions. 262 * 263 * @param func 264 * Iterator function 265 * @param arg 266 * Argument passed to iterator 267 * @return 268 * 0 if walked over the entire list 269 * 1 if stopped by the user 270 * -1 if user function reported error 271 */ 272int 273rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); 274 275/** 276 * Walk each VA-contiguous area without performing any locking. 277 * 278 * @note This function does not perform any locking, and is only safe to call 279 * from within memory-related callback functions. 280 * 281 * @param func 282 * Iterator function 283 * @param arg 284 * Argument passed to iterator 285 * @return 286 * 0 if walked over the entire list 287 * 1 if stopped by the user 288 * -1 if user function reported error 289 */ 290int 291rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg); 292 293/** 294 * Walk each allocated memseg list without performing any locking. 295 * 296 * @note This function does not perform any locking, and is only safe to call 297 * from within memory-related callback functions. 298 * 299 * @param func 300 * Iterator function 301 * @param arg 302 * Argument passed to iterator 303 * @return 304 * 0 if walked over the entire list 305 * 1 if stopped by the user 306 * -1 if user function reported error 307 */ 308int 309rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg); 310 311/** 312 * Return file descriptor associated with a particular memseg (if available). 313 * 314 * @note This function read-locks the memory hotplug subsystem, and thus cannot 315 * be used within memory-related callback functions. 316 * 317 * @note This returns an internal file descriptor. Performing any operations on 318 * this file descriptor is inherently dangerous, so it should be treated 319 * as read-only for all intents and purposes. 320 * 321 * @param ms 322 * A pointer to memseg for which to get file descriptor. 323 * 324 * @return 325 * Valid file descriptor in case of success. 326 * -1 in case of error, with ``rte_errno`` set to the following values: 327 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 328 * - ENODEV - ``ms`` fd is not available 329 * - ENOENT - ``ms`` is an unused segment 330 * - ENOTSUP - segment fd's are not supported 331 */ 332int 333rte_memseg_get_fd(const struct rte_memseg *ms); 334 335/** 336 * Return file descriptor associated with a particular memseg (if available). 337 * 338 * @note This function does not perform any locking, and is only safe to call 339 * from within memory-related callback functions. 340 * 341 * @note This returns an internal file descriptor. Performing any operations on 342 * this file descriptor is inherently dangerous, so it should be treated 343 * as read-only for all intents and purposes. 344 * 345 * @param ms 346 * A pointer to memseg for which to get file descriptor. 347 * 348 * @return 349 * Valid file descriptor in case of success. 350 * -1 in case of error, with ``rte_errno`` set to the following values: 351 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 352 * - ENODEV - ``ms`` fd is not available 353 * - ENOENT - ``ms`` is an unused segment 354 * - ENOTSUP - segment fd's are not supported 355 */ 356int 357rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms); 358 359/** 360 * Get offset into segment file descriptor associated with a particular memseg 361 * (if available). 362 * 363 * @note This function read-locks the memory hotplug subsystem, and thus cannot 364 * be used within memory-related callback functions. 365 * 366 * @param ms 367 * A pointer to memseg for which to get file descriptor. 368 * @param offset 369 * A pointer to offset value where the result will be stored. 370 * 371 * @return 372 * Valid file descriptor in case of success. 373 * -1 in case of error, with ``rte_errno`` set to the following values: 374 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 375 * - EINVAL - ``offset`` pointer was NULL 376 * - ENODEV - ``ms`` fd is not available 377 * - ENOENT - ``ms`` is an unused segment 378 * - ENOTSUP - segment fd's are not supported 379 */ 380int 381rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset); 382 383/** 384 * Get offset into segment file descriptor associated with a particular memseg 385 * (if available). 386 * 387 * @note This function does not perform any locking, and is only safe to call 388 * from within memory-related callback functions. 389 * 390 * @param ms 391 * A pointer to memseg for which to get file descriptor. 392 * @param offset 393 * A pointer to offset value where the result will be stored. 394 * 395 * @return 396 * Valid file descriptor in case of success. 397 * -1 in case of error, with ``rte_errno`` set to the following values: 398 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 399 * - EINVAL - ``offset`` pointer was NULL 400 * - ENODEV - ``ms`` fd is not available 401 * - ENOENT - ``ms`` is an unused segment 402 * - ENOTSUP - segment fd's are not supported 403 */ 404int 405rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, 406 size_t *offset); 407 408/** 409 * Register external memory chunk with DPDK. 410 * 411 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 412 * API's. 413 * 414 * @note This API will not perform any DMA mapping. It is expected that user 415 * will do that themselves. 416 * 417 * @note Before accessing this memory in other processes, it needs to be 418 * attached in each of those processes by calling ``rte_extmem_attach`` in 419 * each other process. 420 * 421 * @param va_addr 422 * Start of virtual area to register. Must be aligned by ``page_sz``. 423 * @param len 424 * Length of virtual area to register. Must be aligned by ``page_sz``. 425 * @param iova_addrs 426 * Array of page IOVA addresses corresponding to each page in this memory 427 * area. Can be NULL, in which case page IOVA addresses will be set to 428 * RTE_BAD_IOVA. 429 * @param n_pages 430 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs`` 431 * is NULL. 432 * @param page_sz 433 * Page size of the underlying memory 434 * 435 * @return 436 * - 0 on success 437 * - -1 in case of error, with rte_errno set to one of the following: 438 * EINVAL - one of the parameters was invalid 439 * EEXIST - memory chunk is already registered 440 * ENOSPC - no more space in internal config to store a new memory chunk 441 */ 442int 443rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], 444 unsigned int n_pages, size_t page_sz); 445 446/** 447 * Unregister external memory chunk with DPDK. 448 * 449 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 450 * API's. 451 * 452 * @note This API will not perform any DMA unmapping. It is expected that user 453 * will do that themselves. 454 * 455 * @note Before calling this function, all other processes must call 456 * ``rte_extmem_detach`` to detach from the memory area. 457 * 458 * @param va_addr 459 * Start of virtual area to unregister 460 * @param len 461 * Length of virtual area to unregister 462 * 463 * @return 464 * - 0 on success 465 * - -1 in case of error, with rte_errno set to one of the following: 466 * EINVAL - one of the parameters was invalid 467 * ENOENT - memory chunk was not found 468 */ 469int 470rte_extmem_unregister(void *va_addr, size_t len); 471 472/** 473 * Attach to external memory chunk registered in another process. 474 * 475 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 476 * API's. 477 * 478 * @note This API will not perform any DMA mapping. It is expected that user 479 * will do that themselves. 480 * 481 * @param va_addr 482 * Start of virtual area to register 483 * @param len 484 * Length of virtual area to register 485 * 486 * @return 487 * - 0 on success 488 * - -1 in case of error, with rte_errno set to one of the following: 489 * EINVAL - one of the parameters was invalid 490 * ENOENT - memory chunk was not found 491 */ 492int 493rte_extmem_attach(void *va_addr, size_t len); 494 495/** 496 * Detach from external memory chunk registered in another process. 497 * 498 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 499 * API's. 500 * 501 * @note This API will not perform any DMA unmapping. It is expected that user 502 * will do that themselves. 503 * 504 * @param va_addr 505 * Start of virtual area to unregister 506 * @param len 507 * Length of virtual area to unregister 508 * 509 * @return 510 * - 0 on success 511 * - -1 in case of error, with rte_errno set to one of the following: 512 * EINVAL - one of the parameters was invalid 513 * ENOENT - memory chunk was not found 514 */ 515int 516rte_extmem_detach(void *va_addr, size_t len); 517 518/** 519 * Dump the physical memory layout to a file. 520 * 521 * @note This function read-locks the memory hotplug subsystem, and thus cannot 522 * be used within memory-related callback functions. 523 * 524 * @param f 525 * A pointer to a file for output 526 */ 527void rte_dump_physmem_layout(FILE *f); 528 529/** 530 * Get the total amount of available physical memory. 531 * 532 * @note This function read-locks the memory hotplug subsystem, and thus cannot 533 * be used within memory-related callback functions. 534 * 535 * @return 536 * The total amount of available physical memory in bytes. 537 */ 538uint64_t rte_eal_get_physmem_size(void); 539 540/** 541 * Get the number of memory channels. 542 * 543 * @return 544 * The number of memory channels on the system. The value is 0 if unknown 545 * or not the same on all devices. 546 */ 547unsigned rte_memory_get_nchannel(void); 548 549/** 550 * Get the number of memory ranks. 551 * 552 * @return 553 * The number of memory ranks on the system. The value is 0 if unknown or 554 * not the same on all devices. 555 */ 556unsigned rte_memory_get_nrank(void); 557 558/** 559 * Check if all currently allocated memory segments are compliant with 560 * supplied DMA address width. 561 * 562 * @param maskbits 563 * Address width to check against. 564 */ 565int rte_mem_check_dma_mask(uint8_t maskbits); 566 567/** 568 * Check if all currently allocated memory segments are compliant with 569 * supplied DMA address width. This function will use 570 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying 571 * memory_hotplug_lock will not be acquired avoiding deadlock during 572 * memory initialization. 573 * 574 * This function is just for EAL core memory internal use. Drivers should 575 * use the previous rte_mem_check_dma_mask. 576 * 577 * @param maskbits 578 * Address width to check against. 579 */ 580int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits); 581 582/** 583 * Set dma mask to use once memory initialization is done. Previous functions 584 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be 585 * used safely until memory has been initialized. 586 */ 587void rte_mem_set_dma_mask(uint8_t maskbits); 588 589/** 590 * Drivers based on uio will not load unless physical 591 * addresses are obtainable. It is only possible to get 592 * physical addresses when running as a privileged user. 593 * 594 * @return 595 * 1 if the system is able to obtain physical addresses. 596 * 0 if using DMA addresses through an IOMMU. 597 */ 598int rte_eal_using_phys_addrs(void); 599 600 601/** 602 * Enum indicating which kind of memory event has happened. Used by callbacks to 603 * distinguish between memory allocations and deallocations. 604 */ 605enum rte_mem_event { 606 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */ 607 RTE_MEM_EVENT_FREE, /**< Deallocation event. */ 608}; 609#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64 610/**< maximum length of callback name */ 611 612/** 613 * Function typedef used to register callbacks for memory events. 614 */ 615typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type, 616 const void *addr, size_t len, void *arg); 617 618/** 619 * Function used to register callbacks for memory events. 620 * 621 * @note callbacks will happen while memory hotplug subsystem is write-locked, 622 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a 623 * deadlock when called from within such callbacks. 624 * 625 * @note mem event callbacks not being supported is an expected error condition, 626 * so user code needs to handle this situation. In these cases, return 627 * value will be -1, and rte_errno will be set to ENOTSUP. 628 * 629 * @param name 630 * Name associated with specified callback to be added to the list. 631 * 632 * @param clb 633 * Callback function pointer. 634 * 635 * @param arg 636 * Argument to pass to the callback. 637 * 638 * @return 639 * 0 on successful callback register 640 * -1 on unsuccessful callback register, with rte_errno value indicating 641 * reason for failure. 642 */ 643int 644rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb, 645 void *arg); 646 647/** 648 * Function used to unregister callbacks for memory events. 649 * 650 * @param name 651 * Name associated with specified callback to be removed from the list. 652 * 653 * @param arg 654 * Argument to look for among callbacks with specified callback name. 655 * 656 * @return 657 * 0 on successful callback unregister 658 * -1 on unsuccessful callback unregister, with rte_errno value indicating 659 * reason for failure. 660 */ 661int 662rte_mem_event_callback_unregister(const char *name, void *arg); 663 664 665#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64 666/**< maximum length of alloc validator name */ 667/** 668 * Function typedef used to register memory allocation validation callbacks. 669 * 670 * Returning 0 will allow allocation attempt to continue. Returning -1 will 671 * prevent allocation from succeeding. 672 */ 673typedef int (*rte_mem_alloc_validator_t)(int socket_id, 674 size_t cur_limit, size_t new_len); 675 676/** 677 * @brief Register validator callback for memory allocations. 678 * 679 * Callbacks registered by this function will be called right before memory 680 * allocator is about to trigger allocation of more pages from the system if 681 * said allocation will bring total memory usage above specified limit on 682 * specified socket. User will be able to cancel pending allocation if callback 683 * returns -1. 684 * 685 * @note callbacks will happen while memory hotplug subsystem is write-locked, 686 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a 687 * deadlock when called from within such callbacks. 688 * 689 * @note validator callbacks not being supported is an expected error condition, 690 * so user code needs to handle this situation. In these cases, return 691 * value will be -1, and rte_errno will be set to ENOTSUP. 692 * 693 * @param name 694 * Name associated with specified callback to be added to the list. 695 * 696 * @param clb 697 * Callback function pointer. 698 * 699 * @param socket_id 700 * Socket ID on which to watch for allocations. 701 * 702 * @param limit 703 * Limit above which to trigger callbacks. 704 * 705 * @return 706 * 0 on successful callback register 707 * -1 on unsuccessful callback register, with rte_errno value indicating 708 * reason for failure. 709 */ 710int 711rte_mem_alloc_validator_register(const char *name, 712 rte_mem_alloc_validator_t clb, int socket_id, size_t limit); 713 714/** 715 * @brief Unregister validator callback for memory allocations. 716 * 717 * @param name 718 * Name associated with specified callback to be removed from the list. 719 * 720 * @param socket_id 721 * Socket ID on which to watch for allocations. 722 * 723 * @return 724 * 0 on successful callback unregister 725 * -1 on unsuccessful callback unregister, with rte_errno value indicating 726 * reason for failure. 727 */ 728int 729rte_mem_alloc_validator_unregister(const char *name, int socket_id); 730 731#ifdef __cplusplus 732} 733#endif 734 735#endif /* _RTE_MEMORY_H_ */ 736