dpdk/lib/eal/linux/eal_hugepage_info.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2014 Intel Corporation
   3 */
   4
   5#include <string.h>
   6#include <sys/types.h>
   7#include <sys/file.h>
   8#include <dirent.h>
   9#include <fcntl.h>
  10#include <stdint.h>
  11#include <stdlib.h>
  12#include <stdio.h>
  13#include <fnmatch.h>
  14#include <inttypes.h>
  15#include <stdarg.h>
  16#include <unistd.h>
  17#include <errno.h>
  18#include <sys/mman.h>
  19#include <sys/queue.h>
  20#include <sys/stat.h>
  21
  22#include <linux/mman.h> /* for hugetlb-related flags */
  23
  24#include <rte_memory.h>
  25#include <rte_eal.h>
  26#include <rte_launch.h>
  27#include <rte_per_lcore.h>
  28#include <rte_lcore.h>
  29#include <rte_debug.h>
  30#include <rte_log.h>
  31#include <rte_common.h>
  32#include "rte_string_fns.h"
  33
  34#include "eal_private.h"
  35#include "eal_internal_cfg.h"
  36#include "eal_hugepages.h"
  37#include "eal_filesystem.h"
  38
  39static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
  40static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node";
  41
  42/*
  43 * Uses mmap to create a shared memory area for storage of data
  44 * Used in this file to store the hugepage file map on disk
  45 */
  46static void *
  47map_shared_memory(const char *filename, const size_t mem_size, int flags)
  48{
  49        void *retval;
  50        int fd = open(filename, flags, 0600);
  51        if (fd < 0)
  52                return NULL;
  53        if (ftruncate(fd, mem_size) < 0) {
  54                close(fd);
  55                return NULL;
  56        }
  57        retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
  58                        MAP_SHARED, fd, 0);
  59        close(fd);
  60        return retval;
  61}
  62
  63static void *
  64open_shared_memory(const char *filename, const size_t mem_size)
  65{
  66        return map_shared_memory(filename, mem_size, O_RDWR);
  67}
  68
  69static void *
  70create_shared_memory(const char *filename, const size_t mem_size)
  71{
  72        return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
  73}
  74
  75static int get_hp_sysfs_value(const char *subdir, const char *file, unsigned long *val)
  76{
  77        char path[PATH_MAX];
  78
  79        snprintf(path, sizeof(path), "%s/%s/%s",
  80                        sys_dir_path, subdir, file);
  81        return eal_parse_sysfs_value(path, val);
  82}
  83
  84/* this function is only called from eal_hugepage_info_init which itself
  85 * is only called from a primary process */
  86static uint32_t
  87get_num_hugepages(const char *subdir, size_t sz)
  88{
  89        unsigned long resv_pages, num_pages, over_pages, surplus_pages;
  90        const char *nr_hp_file = "free_hugepages";
  91        const char *nr_rsvd_file = "resv_hugepages";
  92        const char *nr_over_file = "nr_overcommit_hugepages";
  93        const char *nr_splus_file = "surplus_hugepages";
  94
  95        /* first, check how many reserved pages kernel reports */
  96        if (get_hp_sysfs_value(subdir, nr_rsvd_file, &resv_pages) < 0)
  97                return 0;
  98
  99        if (get_hp_sysfs_value(subdir, nr_hp_file, &num_pages) < 0)
 100                return 0;
 101
 102        if (get_hp_sysfs_value(subdir, nr_over_file, &over_pages) < 0)
 103                over_pages = 0;
 104
 105        if (get_hp_sysfs_value(subdir, nr_splus_file, &surplus_pages) < 0)
 106                surplus_pages = 0;
 107
 108        /* adjust num_pages */
 109        if (num_pages >= resv_pages)
 110                num_pages -= resv_pages;
 111        else if (resv_pages)
 112                num_pages = 0;
 113
 114        if (over_pages >= surplus_pages)
 115                over_pages -= surplus_pages;
 116        else
 117                over_pages = 0;
 118
 119        if (num_pages == 0 && over_pages == 0)
 120                RTE_LOG(WARNING, EAL, "No available %zu kB hugepages reported\n",
 121                                sz >> 10);
 122
 123        num_pages += over_pages;
 124        if (num_pages < over_pages) /* overflow */
 125                num_pages = UINT32_MAX;
 126
 127        /* we want to return a uint32_t and more than this looks suspicious
 128         * anyway ... */
 129        if (num_pages > UINT32_MAX)
 130                num_pages = UINT32_MAX;
 131
 132        return num_pages;
 133}
 134
 135static uint32_t
 136get_num_hugepages_on_node(const char *subdir, unsigned int socket, size_t sz)
 137{
 138        char path[PATH_MAX], socketpath[PATH_MAX];
 139        DIR *socketdir;
 140        unsigned long num_pages = 0;
 141        const char *nr_hp_file = "free_hugepages";
 142
 143        snprintf(socketpath, sizeof(socketpath), "%s/node%u/hugepages",
 144                sys_pages_numa_dir_path, socket);
 145
 146        socketdir = opendir(socketpath);
 147        if (socketdir) {
 148                /* Keep calm and carry on */
 149                closedir(socketdir);
 150        } else {
 151                /* Can't find socket dir, so ignore it */
 152                return 0;
 153        }
 154
 155        snprintf(path, sizeof(path), "%s/%s/%s",
 156                        socketpath, subdir, nr_hp_file);
 157        if (eal_parse_sysfs_value(path, &num_pages) < 0)
 158                return 0;
 159
 160        if (num_pages == 0)
 161                RTE_LOG(WARNING, EAL, "No free %zu kB hugepages reported on node %u\n",
 162                                sz >> 10, socket);
 163
 164        /*
 165         * we want to return a uint32_t and more than this looks suspicious
 166         * anyway ...
 167         */
 168        if (num_pages > UINT32_MAX)
 169                num_pages = UINT32_MAX;
 170
 171        return num_pages;
 172}
 173
 174static uint64_t
 175get_default_hp_size(void)
 176{
 177        const char proc_meminfo[] = "/proc/meminfo";
 178        const char str_hugepagesz[] = "Hugepagesize:";
 179        unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1;
 180        char buffer[256];
 181        unsigned long long size = 0;
 182
 183        FILE *fd = fopen(proc_meminfo, "r");
 184        if (fd == NULL)
 185                rte_panic("Cannot open %s\n", proc_meminfo);
 186        while(fgets(buffer, sizeof(buffer), fd)){
 187                if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){
 188                        size = rte_str_to_size(&buffer[hugepagesz_len]);
 189                        break;
 190                }
 191        }
 192        fclose(fd);
 193        if (size == 0)
 194                rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo);
 195        return size;
 196}
 197
 198static int
 199get_hugepage_dir(uint64_t hugepage_sz, char *hugedir, int len)
 200{
 201        enum proc_mount_fieldnames {
 202                DEVICE = 0,
 203                MOUNTPT,
 204                FSTYPE,
 205                OPTIONS,
 206                _FIELDNAME_MAX
 207        };
 208        static uint64_t default_size = 0;
 209        const char proc_mounts[] = "/proc/mounts";
 210        const char hugetlbfs_str[] = "hugetlbfs";
 211        const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
 212        const char pagesize_opt[] = "pagesize=";
 213        const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
 214        const char split_tok = ' ';
 215        char *splitstr[_FIELDNAME_MAX];
 216        char buf[BUFSIZ];
 217        int retval = -1;
 218        const struct internal_config *internal_conf =
 219                eal_get_internal_configuration();
 220
 221        FILE *fd = fopen(proc_mounts, "r");
 222        if (fd == NULL)
 223                rte_panic("Cannot open %s\n", proc_mounts);
 224
 225        if (default_size == 0)
 226                default_size = get_default_hp_size();
 227
 228        while (fgets(buf, sizeof(buf), fd)){
 229                if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
 230                                split_tok) != _FIELDNAME_MAX) {
 231                        RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
 232                        break; /* return NULL */
 233                }
 234
 235                /* we have a specified --huge-dir option, only examine that dir */
 236                if (internal_conf->hugepage_dir != NULL &&
 237                                strcmp(splitstr[MOUNTPT], internal_conf->hugepage_dir) != 0)
 238                        continue;
 239
 240                if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
 241                        const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
 242
 243                        /* if no explicit page size, the default page size is compared */
 244                        if (pagesz_str == NULL){
 245                                if (hugepage_sz == default_size){
 246                                        strlcpy(hugedir, splitstr[MOUNTPT], len);
 247                                        retval = 0;
 248                                        break;
 249                                }
 250                        }
 251                        /* there is an explicit page size, so check it */
 252                        else {
 253                                uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
 254                                if (pagesz == hugepage_sz) {
 255                                        strlcpy(hugedir, splitstr[MOUNTPT], len);
 256                                        retval = 0;
 257                                        break;
 258                                }
 259                        }
 260                } /* end if strncmp hugetlbfs */
 261        } /* end while fgets */
 262
 263        fclose(fd);
 264        return retval;
 265}
 266
 267/*
 268 * Clear the hugepage directory of whatever hugepage files
 269 * there are. Checks if the file is locked (i.e.
 270 * if it's in use by another DPDK process).
 271 */
 272static int
 273clear_hugedir(const char * hugedir)
 274{
 275        DIR *dir;
 276        struct dirent *dirent;
 277        int dir_fd, fd, lck_result;
 278        const char filter[] = "*map_*"; /* matches hugepage files */
 279
 280        /* open directory */
 281        dir = opendir(hugedir);
 282        if (!dir) {
 283                RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n",
 284                                hugedir);
 285                goto error;
 286        }
 287        dir_fd = dirfd(dir);
 288
 289        dirent = readdir(dir);
 290        if (!dirent) {
 291                RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n",
 292                                hugedir);
 293                goto error;
 294        }
 295
 296        while(dirent != NULL){
 297                /* skip files that don't match the hugepage pattern */
 298                if (fnmatch(filter, dirent->d_name, 0) > 0) {
 299                        dirent = readdir(dir);
 300                        continue;
 301                }
 302
 303                /* try and lock the file */
 304                fd = openat(dir_fd, dirent->d_name, O_RDONLY);
 305
 306                /* skip to next file */
 307                if (fd == -1) {
 308                        dirent = readdir(dir);
 309                        continue;
 310                }
 311
 312                /* non-blocking lock */
 313                lck_result = flock(fd, LOCK_EX | LOCK_NB);
 314
 315                /* if lock succeeds, remove the file */
 316                if (lck_result != -1)
 317                        unlinkat(dir_fd, dirent->d_name, 0);
 318                close (fd);
 319                dirent = readdir(dir);
 320        }
 321
 322        closedir(dir);
 323        return 0;
 324
 325error:
 326        if (dir)
 327                closedir(dir);
 328
 329        RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n",
 330                strerror(errno));
 331
 332        return -1;
 333}
 334
 335static int
 336compare_hpi(const void *a, const void *b)
 337{
 338        const struct hugepage_info *hpi_a = a;
 339        const struct hugepage_info *hpi_b = b;
 340
 341        return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
 342}
 343
 344static void
 345calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)
 346{
 347        uint64_t total_pages = 0;
 348        unsigned int i;
 349        const struct internal_config *internal_conf =
 350                eal_get_internal_configuration();
 351
 352        /*
 353         * first, try to put all hugepages into relevant sockets, but
 354         * if first attempts fails, fall back to collecting all pages
 355         * in one socket and sorting them later
 356         */
 357        total_pages = 0;
 358        /* we also don't want to do this for legacy init */
 359        if (!internal_conf->legacy_mem)
 360                for (i = 0; i < rte_socket_count(); i++) {
 361                        int socket = rte_socket_id_by_idx(i);
 362                        unsigned int num_pages =
 363                                        get_num_hugepages_on_node(
 364                                                dirent->d_name, socket,
 365                                                hpi->hugepage_sz);
 366                        hpi->num_pages[socket] = num_pages;
 367                        total_pages += num_pages;
 368                }
 369        /*
 370         * we failed to sort memory from the get go, so fall
 371         * back to old way
 372         */
 373        if (total_pages == 0) {
 374                hpi->num_pages[0] = get_num_hugepages(dirent->d_name,
 375                                hpi->hugepage_sz);
 376
 377#ifndef RTE_ARCH_64
 378                /* for 32-bit systems, limit number of hugepages to
 379                 * 1GB per page size */
 380                hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
 381                                RTE_PGSIZE_1G / hpi->hugepage_sz);
 382#endif
 383        }
 384}
 385
 386static int
 387hugepage_info_init(void)
 388{       const char dirent_start_text[] = "hugepages-";
 389        const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
 390        unsigned int i, num_sizes = 0;
 391        DIR *dir;
 392        struct dirent *dirent;
 393        struct internal_config *internal_conf =
 394                eal_get_internal_configuration();
 395
 396        dir = opendir(sys_dir_path);
 397        if (dir == NULL) {
 398                RTE_LOG(ERR, EAL,
 399                        "Cannot open directory %s to read system hugepage info\n",
 400                        sys_dir_path);
 401                return -1;
 402        }
 403
 404        for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
 405                struct hugepage_info *hpi;
 406
 407                if (strncmp(dirent->d_name, dirent_start_text,
 408                            dirent_start_len) != 0)
 409                        continue;
 410
 411                if (num_sizes >= MAX_HUGEPAGE_SIZES)
 412                        break;
 413
 414                hpi = &internal_conf->hugepage_info[num_sizes];
 415                hpi->hugepage_sz =
 416                        rte_str_to_size(&dirent->d_name[dirent_start_len]);
 417
 418                /* first, check if we have a mountpoint */
 419                if (get_hugepage_dir(hpi->hugepage_sz,
 420                        hpi->hugedir, sizeof(hpi->hugedir)) < 0) {
 421                        uint32_t num_pages;
 422
 423                        num_pages = get_num_hugepages(dirent->d_name,
 424                                        hpi->hugepage_sz);
 425                        if (num_pages > 0)
 426                                RTE_LOG(NOTICE, EAL,
 427                                        "%" PRIu32 " hugepages of size "
 428                                        "%" PRIu64 " reserved, but no mounted "
 429                                        "hugetlbfs found for that size\n",
 430                                        num_pages, hpi->hugepage_sz);
 431                        /* if we have kernel support for reserving hugepages
 432                         * through mmap, and we're in in-memory mode, treat this
 433                         * page size as valid. we cannot be in legacy mode at
 434                         * this point because we've checked this earlier in the
 435                         * init process.
 436                         */
 437#ifdef MAP_HUGE_SHIFT
 438                        if (internal_conf->in_memory) {
 439                                RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
 440                                        "hugepages of size %" PRIu64 " bytes "
 441                                        "will be allocated anonymously\n",
 442                                        hpi->hugepage_sz);
 443                                calc_num_pages(hpi, dirent);
 444                                num_sizes++;
 445                        }
 446#endif
 447                        continue;
 448                }
 449
 450                /* try to obtain a writelock */
 451                hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
 452
 453                /* if blocking lock failed */
 454                if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
 455                        RTE_LOG(CRIT, EAL,
 456                                "Failed to lock hugepage directory!\n");
 457                        break;
 458                }
 459                /* clear out the hugepages dir from unused pages */
 460                if (clear_hugedir(hpi->hugedir) == -1)
 461                        break;
 462
 463                calc_num_pages(hpi, dirent);
 464
 465                num_sizes++;
 466        }
 467        closedir(dir);
 468
 469        /* something went wrong, and we broke from the for loop above */
 470        if (dirent != NULL)
 471                return -1;
 472
 473        internal_conf->num_hugepage_sizes = num_sizes;
 474
 475        /* sort the page directory entries by size, largest to smallest */
 476        qsort(&internal_conf->hugepage_info[0], num_sizes,
 477              sizeof(internal_conf->hugepage_info[0]), compare_hpi);
 478
 479        /* now we have all info, check we have at least one valid size */
 480        for (i = 0; i < num_sizes; i++) {
 481                /* pages may no longer all be on socket 0, so check all */
 482                unsigned int j, num_pages = 0;
 483                struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
 484
 485                for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
 486                        num_pages += hpi->num_pages[j];
 487                if (num_pages > 0)
 488                        return 0;
 489        }
 490
 491        /* no valid hugepage mounts available, return error */
 492        return -1;
 493}
 494
 495/*
 496 * when we initialize the hugepage info, everything goes
 497 * to socket 0 by default. it will later get sorted by memory
 498 * initialization procedure.
 499 */
 500int
 501eal_hugepage_info_init(void)
 502{
 503        struct hugepage_info *hpi, *tmp_hpi;
 504        unsigned int i;
 505        struct internal_config *internal_conf =
 506                eal_get_internal_configuration();
 507
 508        if (hugepage_info_init() < 0)
 509                return -1;
 510
 511        /* for no shared files mode, we're done */
 512        if (internal_conf->no_shconf)
 513                return 0;
 514
 515        hpi = &internal_conf->hugepage_info[0];
 516
 517        tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
 518                        sizeof(internal_conf->hugepage_info));
 519        if (tmp_hpi == NULL) {
 520                RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
 521                return -1;
 522        }
 523
 524        memcpy(tmp_hpi, hpi, sizeof(internal_conf->hugepage_info));
 525
 526        /* we've copied file descriptors along with everything else, but they
 527         * will be invalid in secondary process, so overwrite them
 528         */
 529        for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) {
 530                struct hugepage_info *tmp = &tmp_hpi[i];
 531                tmp->lock_descriptor = -1;
 532        }
 533
 534        if (munmap(tmp_hpi, sizeof(internal_conf->hugepage_info)) < 0) {
 535                RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
 536                return -1;
 537        }
 538        return 0;
 539}
 540
 541int eal_hugepage_info_read(void)
 542{
 543        struct internal_config *internal_conf =
 544                eal_get_internal_configuration();
 545        struct hugepage_info *hpi = &internal_conf->hugepage_info[0];
 546        struct hugepage_info *tmp_hpi;
 547
 548        tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
 549                                  sizeof(internal_conf->hugepage_info));
 550        if (tmp_hpi == NULL) {
 551                RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
 552                return -1;
 553        }
 554
 555        memcpy(hpi, tmp_hpi, sizeof(internal_conf->hugepage_info));
 556
 557        if (munmap(tmp_hpi, sizeof(internal_conf->hugepage_info)) < 0) {
 558                RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
 559                return -1;
 560        }
 561        return 0;
 562}
 563