1/* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5#ifndef _EAL_PRIVATE_H_ 6#define _EAL_PRIVATE_H_ 7 8#include <stdbool.h> 9#include <stdint.h> 10#include <stdio.h> 11#include <sys/queue.h> 12 13#include <rte_dev.h> 14#include <rte_lcore.h> 15#include <rte_memory.h> 16 17#include "eal_internal_cfg.h" 18 19/** 20 * Structure storing internal configuration (per-lcore) 21 */ 22struct lcore_config { 23 pthread_t thread_id; /**< pthread identifier */ 24 int pipe_main2worker[2]; /**< communication pipe with main */ 25 int pipe_worker2main[2]; /**< communication pipe with main */ 26 27 lcore_function_t * volatile f; /**< function to call */ 28 void * volatile arg; /**< argument of function */ 29 volatile int ret; /**< return value of function */ 30 31 volatile enum rte_lcore_state_t state; /**< lcore state */ 32 unsigned int socket_id; /**< physical socket id for this lcore */ 33 unsigned int core_id; /**< core number on socket for this lcore */ 34 int core_index; /**< relative index, starting from 0 */ 35 uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */ 36 37 rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */ 38}; 39 40extern struct lcore_config lcore_config[RTE_MAX_LCORE]; 41 42/** 43 * The global RTE configuration structure. 44 */ 45struct rte_config { 46 uint32_t main_lcore; /**< Id of the main lcore */ 47 uint32_t lcore_count; /**< Number of available logical cores. */ 48 uint32_t numa_node_count; /**< Number of detected NUMA nodes. */ 49 uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */ 50 uint32_t service_lcore_count;/**< Number of available service cores. */ 51 enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ 52 53 /** Primary or secondary configuration */ 54 enum rte_proc_type_t process_type; 55 56 /** PA or VA mapping mode */ 57 enum rte_iova_mode iova_mode; 58 59 /** 60 * Pointer to memory configuration, which may be shared across multiple 61 * DPDK instances 62 */ 63 struct rte_mem_config *mem_config; 64} __rte_packed; 65 66/** 67 * Get the global configuration structure. 68 * 69 * @return 70 * A pointer to the global configuration structure. 71 */ 72struct rte_config *rte_eal_get_configuration(void); 73 74/** 75 * Initialize the memzone subsystem (private to eal). 76 * 77 * @return 78 * - 0 on success 79 * - Negative on error 80 */ 81int rte_eal_memzone_init(void); 82 83/** 84 * Fill configuration with number of physical and logical processors 85 * 86 * This function is private to EAL. 87 * 88 * Parse /proc/cpuinfo to get the number of physical and logical 89 * processors on the machine. 90 * 91 * @return 92 * 0 on success, negative on error 93 */ 94int rte_eal_cpu_init(void); 95 96/** 97 * Create memseg lists 98 * 99 * This function is private to EAL. 100 * 101 * Preallocate virtual memory. 102 * 103 * @return 104 * 0 on success, negative on error 105 */ 106int rte_eal_memseg_init(void); 107 108/** 109 * Map memory 110 * 111 * This function is private to EAL. 112 * 113 * Fill configuration structure with these infos, and return 0 on success. 114 * 115 * @return 116 * 0 on success, negative on error 117 */ 118int rte_eal_memory_init(void); 119 120/** 121 * Configure timers 122 * 123 * This function is private to EAL. 124 * 125 * Mmap memory areas used by HPET (high precision event timer) that will 126 * provide our time reference, and configure the TSC frequency also for it 127 * to be used as a reference. 128 * 129 * @return 130 * 0 on success, negative on error 131 */ 132int rte_eal_timer_init(void); 133 134/** 135 * Init tail queues for non-EAL library structures. This is to allow 136 * the rings, mempools, etc. lists to be shared among multiple processes 137 * 138 * This function is private to EAL 139 * 140 * @return 141 * 0 on success, negative on error 142 */ 143int rte_eal_tailqs_init(void); 144 145/** 146 * Init interrupt handling. 147 * 148 * This function is private to EAL. 149 * 150 * @return 151 * 0 on success, negative on error 152 */ 153int rte_eal_intr_init(void); 154 155/** 156 * Close the default log stream 157 * 158 * This function is private to EAL. 159 */ 160void rte_eal_log_cleanup(void); 161 162/** 163 * Init alarm mechanism. This is to allow a callback be called after 164 * specific time. 165 * 166 * This function is private to EAL. 167 * 168 * @return 169 * 0 on success, negative on error 170 */ 171int rte_eal_alarm_init(void); 172 173/** 174 * Alarm mechanism cleanup. 175 * 176 * This function is private to EAL. 177 * 178 * @return 179 * 0 on success, negative on error 180 */ 181void rte_eal_alarm_cleanup(void); 182 183/** 184 * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, 185 * etc.) loaded. 186 * 187 * @param module_name 188 * The module's name which need to be checked 189 * 190 * @return 191 * -1 means some error happens(NULL pointer or open failure) 192 * 0 means the module not loaded 193 * 1 means the module loaded 194 */ 195int rte_eal_check_module(const char *module_name); 196 197/** 198 * Memory reservation flags. 199 */ 200enum eal_mem_reserve_flags { 201 /** 202 * Reserve hugepages. May be unsupported by some platforms. 203 */ 204 EAL_RESERVE_HUGEPAGES = 1 << 0, 205 /** 206 * Force reserving memory at the requested address. 207 * This can be a destructive action depending on the implementation. 208 * 209 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences 210 * (although implementations are not required to use it). 211 */ 212 EAL_RESERVE_FORCE_ADDRESS = 1 << 1 213}; 214 215/** 216 * Get virtual area of specified size from the OS. 217 * 218 * This function is private to the EAL. 219 * 220 * @param requested_addr 221 * Address where to request address space. 222 * @param size 223 * Size of requested area. 224 * @param page_sz 225 * Page size on which to align requested virtual area. 226 * @param flags 227 * EAL_VIRTUAL_AREA_* flags. 228 * @param reserve_flags 229 * Extra flags passed directly to eal_mem_reserve(). 230 * 231 * @return 232 * Virtual area address if successful. 233 * NULL if unsuccessful. 234 */ 235 236#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0) 237/**< don't fail if cannot get exact requested address. */ 238#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1) 239/**< try getting smaller sized (decrement by page size) virtual areas if cannot 240 * get area of requested size. 241 */ 242#define EAL_VIRTUAL_AREA_UNMAP (1 << 2) 243/**< immediately unmap reserved virtual area. */ 244void * 245eal_get_virtual_area(void *requested_addr, size_t *size, 246 size_t page_sz, int flags, int reserve_flags); 247 248/** 249 * Initialize a memory segment list and create its backing storage. 250 * 251 * @param msl 252 * Memory segment list to be filled. 253 * @param name 254 * Name for the backing storage. 255 * @param page_sz 256 * Size of segment pages in the MSL. 257 * @param n_segs 258 * Number of segments. 259 * @param socket_id 260 * Socket ID. Must not be SOCKET_ID_ANY. 261 * @param heap 262 * Mark MSL as pointing to a heap. 263 * @return 264 * 0 on success, (-1) on failure and rte_errno is set. 265 */ 266int 267eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name, 268 uint64_t page_sz, int n_segs, int socket_id, bool heap); 269 270/** 271 * Initialize memory segment list and create its backing storage 272 * with a name corresponding to MSL parameters. 273 * 274 * @param type_msl_idx 275 * Index of the MSL among other MSLs of the same socket and page size. 276 * 277 * @see eal_memseg_list_init_named for remaining parameters description. 278 */ 279int 280eal_memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz, 281 int n_segs, int socket_id, int type_msl_idx, bool heap); 282 283/** 284 * Reserve VA space for a memory segment list 285 * previously initialized with eal_memseg_list_init(). 286 * 287 * @param msl 288 * Initialized memory segment list with page size defined. 289 * @param reserve_flags 290 * Extra memory reservation flags. Can be 0 if unnecessary. 291 * @return 292 * 0 on success, (-1) on failure and rte_errno is set. 293 */ 294int 295eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags); 296 297/** 298 * Populate MSL, each segment is one page long. 299 * 300 * @param msl 301 * Initialized memory segment list with page size defined. 302 * @param addr 303 * Starting address of list segments. 304 * @param n_segs 305 * Number of segments to populate. 306 */ 307void 308eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs); 309 310/** 311 * Distribute available memory between MSLs. 312 * 313 * @return 314 * 0 on success, (-1) on failure. 315 */ 316int 317eal_dynmem_memseg_lists_init(void); 318 319/** 320 * Preallocate hugepages for dynamic allocation. 321 * 322 * @return 323 * 0 on success, (-1) on failure. 324 */ 325int 326eal_dynmem_hugepage_init(void); 327 328/** 329 * Given the list of hugepage sizes and the number of pages thereof, 330 * calculate the best number of pages of each size to fulfill the request 331 * for RAM on each NUMA node. 332 * 333 * @param memory 334 * Amounts of memory requested for each NUMA node of RTE_MAX_NUMA_NODES. 335 * @param hp_info 336 * Information about hugepages of different size. 337 * @param hp_used 338 * Receives information about used hugepages of each size. 339 * @param num_hp_info 340 * Number of elements in hp_info and hp_used. 341 * @return 342 * 0 on success, (-1) on failure. 343 */ 344int 345eal_dynmem_calc_num_pages_per_socket( 346 uint64_t *memory, struct hugepage_info *hp_info, 347 struct hugepage_info *hp_used, unsigned int num_hp_info); 348 349/** 350 * Get cpu core_id. 351 * 352 * This function is private to the EAL. 353 */ 354unsigned eal_cpu_core_id(unsigned lcore_id); 355 356/** 357 * Check if cpu is present. 358 * 359 * This function is private to the EAL. 360 */ 361int eal_cpu_detected(unsigned lcore_id); 362 363/** 364 * Set TSC frequency from precise value or estimation 365 * 366 * This function is private to the EAL. 367 */ 368void set_tsc_freq(void); 369 370/** 371 * Get precise TSC frequency from system 372 * 373 * This function is private to the EAL. 374 */ 375uint64_t get_tsc_freq(void); 376 377/** 378 * Get TSC frequency if the architecture supports. 379 * 380 * This function is private to the EAL. 381 * 382 * @return 383 * The number of TSC cycles in one second. 384 * Returns zero if the architecture support is not available. 385 */ 386uint64_t get_tsc_freq_arch(void); 387 388/** 389 * Allocate a free lcore to associate to a non-EAL thread. 390 * 391 * @return 392 * - the id of a lcore with role ROLE_NON_EAL on success. 393 * - RTE_MAX_LCORE if none was available or initializing was refused (see 394 * rte_lcore_callback_register). 395 */ 396unsigned int eal_lcore_non_eal_allocate(void); 397 398/** 399 * Release the lcore used by a non-EAL thread. 400 * Counterpart of eal_lcore_non_eal_allocate(). 401 * 402 * @param lcore_id 403 * The lcore with role ROLE_NON_EAL to release. 404 */ 405void eal_lcore_non_eal_release(unsigned int lcore_id); 406 407/** 408 * Prepare physical memory mapping 409 * i.e. hugepages on Linux and 410 * contigmem on BSD. 411 * 412 * This function is private to the EAL. 413 */ 414int rte_eal_hugepage_init(void); 415 416/** 417 * Creates memory mapping in secondary process 418 * i.e. hugepages on Linux and 419 * contigmem on BSD. 420 * 421 * This function is private to the EAL. 422 */ 423int rte_eal_hugepage_attach(void); 424 425/** 426 * Detaches all memory mappings from a process. 427 * 428 * This function is private to the EAL. 429 */ 430int rte_eal_memory_detach(void); 431 432/** 433 * Find a bus capable of identifying a device. 434 * 435 * @param str 436 * A device identifier (PCI address, virtual PMD name, ...). 437 * 438 * @return 439 * A valid bus handle if found. 440 * NULL if no bus is able to parse this device. 441 */ 442struct rte_bus *rte_bus_find_by_device_name(const char *str); 443 444/** 445 * Create the unix channel for primary/secondary communication. 446 * 447 * @return 448 * 0 on success; 449 * (<0) on failure. 450 */ 451int rte_mp_channel_init(void); 452 453/** 454 * Primary/secondary communication cleanup. 455 */ 456void rte_mp_channel_cleanup(void); 457 458/** 459 * @internal 460 * Parse a device string and store its information in an 461 * rte_devargs structure. 462 * 463 * A device description is split by layers of abstraction of the device: 464 * bus, class and driver. Each layer will offer a set of properties that 465 * can be applied either to configure or recognize a device. 466 * 467 * This function will parse those properties and prepare the rte_devargs 468 * to be given to each layers for processing. 469 * 470 * Note: if the "data" field of the devargs points to devstr, 471 * then no dynamic allocation is performed and the rte_devargs 472 * can be safely discarded. 473 * 474 * Otherwise ``data`` will hold a workable copy of devstr, that will be 475 * used by layers descriptors within rte_devargs. In this case, 476 * any rte_devargs should be cleaned-up before being freed. 477 * 478 * @param da 479 * rte_devargs structure to fill. 480 * 481 * @param devstr 482 * Device string. 483 * 484 * @return 485 * 0 on success. 486 * Negative errno values on error (rte_errno is set). 487 */ 488int 489rte_devargs_layers_parse(struct rte_devargs *devargs, 490 const char *devstr); 491 492/* 493 * probe a device at local process. 494 * 495 * @param devargs 496 * Device arguments including bus, class and driver properties. 497 * @param new_dev 498 * new device be probed as output. 499 * @return 500 * 0 on success, negative on error. 501 */ 502int local_dev_probe(const char *devargs, struct rte_device **new_dev); 503 504/** 505 * Hotplug remove a given device from a specific bus at local process. 506 * 507 * @param dev 508 * Data structure of the device to remove. 509 * @return 510 * 0 on success, negative on error. 511 */ 512int local_dev_remove(struct rte_device *dev); 513 514/** 515 * Iterate over all buses to find the corresponding bus to handle the sigbus 516 * error. 517 * @param failure_addr 518 * Pointer of the fault address of the sigbus error. 519 * 520 * @return 521 * 0 success to handle the sigbus. 522 * -1 failed to handle the sigbus 523 * 1 no bus can handler the sigbus 524 */ 525int rte_bus_sigbus_handler(const void *failure_addr); 526 527/** 528 * @internal 529 * Register the sigbus handler. 530 * 531 * @return 532 * - On success, zero. 533 * - On failure, a negative value. 534 */ 535int 536dev_sigbus_handler_register(void); 537 538/** 539 * @internal 540 * Unregister the sigbus handler. 541 * 542 * @return 543 * - On success, zero. 544 * - On failure, a negative value. 545 */ 546int 547dev_sigbus_handler_unregister(void); 548 549/** 550 * Get OS-specific EAL mapping base address. 551 */ 552uint64_t 553eal_get_baseaddr(void); 554 555void * 556eal_malloc_no_trace(const char *type, size_t size, unsigned int align); 557 558void eal_free_no_trace(void *addr); 559 560/** Options for eal_file_open(). */ 561enum eal_open_flags { 562 /** Open file for reading. */ 563 EAL_OPEN_READONLY = 0x00, 564 /** Open file for reading and writing. */ 565 EAL_OPEN_READWRITE = 0x02, 566 /** 567 * Create the file if it doesn't exist. 568 * New files are only accessible to the owner (0600 equivalent). 569 */ 570 EAL_OPEN_CREATE = 0x04 571}; 572 573/** 574 * Open or create a file. 575 * 576 * @param path 577 * Path to the file. 578 * @param flags 579 * A combination of eal_open_flags controlling operation and FD behavior. 580 * @return 581 * Open file descriptor on success, (-1) on failure and rte_errno is set. 582 */ 583int 584eal_file_open(const char *path, int flags); 585 586/** File locking operation. */ 587enum eal_flock_op { 588 EAL_FLOCK_SHARED, /**< Acquire a shared lock. */ 589 EAL_FLOCK_EXCLUSIVE, /**< Acquire an exclusive lock. */ 590 EAL_FLOCK_UNLOCK /**< Release a previously taken lock. */ 591}; 592 593/** Behavior on file locking conflict. */ 594enum eal_flock_mode { 595 EAL_FLOCK_WAIT, /**< Wait until the file gets unlocked to lock it. */ 596 EAL_FLOCK_RETURN /**< Return immediately if the file is locked. */ 597}; 598 599/** 600 * Lock or unlock the file. 601 * 602 * On failure @code rte_errno @endcode is set to the error code 603 * specified by POSIX flock(3) description. 604 * 605 * @param fd 606 * Opened file descriptor. 607 * @param op 608 * Operation to perform. 609 * @param mode 610 * Behavior on conflict. 611 * @return 612 * 0 on success, (-1) on failure. 613 */ 614int 615eal_file_lock(int fd, enum eal_flock_op op, enum eal_flock_mode mode); 616 617/** 618 * Truncate or extend the file to the specified size. 619 * 620 * On failure @code rte_errno @endcode is set to the error code 621 * specified by POSIX ftruncate(3) description. 622 * 623 * @param fd 624 * Opened file descriptor. 625 * @param size 626 * Desired file size. 627 * @return 628 * 0 on success, (-1) on failure. 629 */ 630int 631eal_file_truncate(int fd, ssize_t size); 632 633/** 634 * Reserve a region of virtual memory. 635 * 636 * Use eal_mem_free() to free reserved memory. 637 * 638 * @param requested_addr 639 * A desired reservation address which must be page-aligned. 640 * The system might not respect it. 641 * NULL means the address will be chosen by the system. 642 * @param size 643 * Reservation size. Must be a multiple of system page size. 644 * @param flags 645 * Reservation options, a combination of eal_mem_reserve_flags. 646 * @returns 647 * Starting address of the reserved area on success, NULL on failure. 648 * Callers must not access this memory until remapping it. 649 */ 650void * 651eal_mem_reserve(void *requested_addr, size_t size, int flags); 652 653/** 654 * Free memory obtained by eal_mem_reserve() and possibly allocated. 655 * 656 * If *virt* and *size* describe a part of the reserved region, 657 * only this part of the region is freed (accurately up to the system 658 * page size). If *virt* points to allocated memory, *size* must match 659 * the one specified on allocation. The behavior is undefined 660 * if the memory pointed by *virt* is obtained from another source 661 * than listed above. 662 * 663 * @param virt 664 * A virtual address in a region previously reserved. 665 * @param size 666 * Number of bytes to unreserve. 667 */ 668void 669eal_mem_free(void *virt, size_t size); 670 671/** 672 * Configure memory region inclusion into dumps. 673 * 674 * @param virt 675 * Starting address of the region. 676 * @param size 677 * Size of the region. 678 * @param dump 679 * True to include memory into dumps, false to exclude. 680 * @return 681 * 0 on success, (-1) on failure and rte_errno is set. 682 */ 683int 684eal_mem_set_dump(void *virt, size_t size, bool dump); 685 686/** 687 * Sets the runtime directory of DPDK 688 * 689 * @param run_dir 690 * The new runtime directory path of DPDK 691 * @return 692 * 0 on success, (-1) on failure. 693 */ 694int 695eal_set_runtime_dir(const char *run_dir); 696 697/** 698 * Get the internal configuration structure. 699 * 700 * @return 701 * A pointer to the internal configuration structure. 702 */ 703struct internal_config * 704eal_get_internal_configuration(void); 705 706/** 707 * Get the current value of the rte_application_usage pointer 708 * 709 * @return 710 * Pointer to the current value of rte_application_usage . 711 */ 712rte_usage_hook_t 713eal_get_application_usage_hook(void); 714 715/** 716 * Instruct primary process that a secondary process wants to attach. 717 */ 718bool __rte_mp_enable(void); 719 720/** 721 * Init per-lcore info in current thread. 722 * 723 * @param lcore_id 724 * identifier of lcore. 725 * @param cpuset 726 * CPU affinity for this thread. 727 */ 728void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset); 729 730/** 731 * Uninitialize per-lcore info for current thread. 732 */ 733void __rte_thread_uninit(void); 734 735/** 736 * asprintf(3) replacement for Windows. 737 */ 738#ifdef RTE_EXEC_ENV_WINDOWS 739__rte_format_printf(2, 3) 740int eal_asprintf(char **buffer, const char *format, ...); 741 742#define asprintf(buffer, format, ...) \ 743 eal_asprintf(buffer, format, ##__VA_ARGS__) 744#endif 745 746#endif /* _EAL_PRIVATE_H_ */ 747