linux/samples/bpf/test_lru_dist.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Facebook
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 */
   8#define _GNU_SOURCE
   9#include <linux/types.h>
  10#include <stdio.h>
  11#include <unistd.h>
  12#include <linux/bpf.h>
  13#include <errno.h>
  14#include <string.h>
  15#include <assert.h>
  16#include <sched.h>
  17#include <sys/wait.h>
  18#include <sys/stat.h>
  19#include <sys/resource.h>
  20#include <fcntl.h>
  21#include <stdlib.h>
  22#include <time.h>
  23
  24#include <bpf/bpf.h>
  25#include "bpf_util.h"
  26
  27#define min(a, b) ((a) < (b) ? (a) : (b))
  28#ifndef offsetof
  29# define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
  30#endif
  31#define container_of(ptr, type, member) ({                      \
  32        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
  33        (type *)( (char *)__mptr - offsetof(type,member) );})
  34
  35static int nr_cpus;
  36static unsigned long long *dist_keys;
  37static unsigned int dist_key_counts;
  38
  39struct list_head {
  40        struct list_head *next, *prev;
  41};
  42
  43static inline void INIT_LIST_HEAD(struct list_head *list)
  44{
  45        list->next = list;
  46        list->prev = list;
  47}
  48
  49static inline int list_empty(const struct list_head *head)
  50{
  51        return head->next == head;
  52}
  53
  54static inline void __list_add(struct list_head *new,
  55                              struct list_head *prev,
  56                              struct list_head *next)
  57{
  58        next->prev = new;
  59        new->next = next;
  60        new->prev = prev;
  61        prev->next = new;
  62}
  63
  64static inline void list_add(struct list_head *new, struct list_head *head)
  65{
  66        __list_add(new, head, head->next);
  67}
  68
  69static inline void __list_del(struct list_head *prev, struct list_head *next)
  70{
  71        next->prev = prev;
  72        prev->next = next;
  73}
  74
  75static inline void __list_del_entry(struct list_head *entry)
  76{
  77        __list_del(entry->prev, entry->next);
  78}
  79
  80static inline void list_move(struct list_head *list, struct list_head *head)
  81{
  82        __list_del_entry(list);
  83        list_add(list, head);
  84}
  85
  86#define list_entry(ptr, type, member) \
  87        container_of(ptr, type, member)
  88
  89#define list_last_entry(ptr, type, member) \
  90        list_entry((ptr)->prev, type, member)
  91
  92struct pfect_lru_node {
  93        struct list_head list;
  94        unsigned long long key;
  95};
  96
  97struct pfect_lru {
  98        struct list_head list;
  99        struct pfect_lru_node *free_nodes;
 100        unsigned int cur_size;
 101        unsigned int lru_size;
 102        unsigned int nr_unique;
 103        unsigned int nr_misses;
 104        unsigned int total;
 105        int map_fd;
 106};
 107
 108static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
 109                           unsigned int nr_possible_elems)
 110{
 111        lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
 112                                     sizeof(unsigned long long),
 113                                     sizeof(struct pfect_lru_node *),
 114                                     nr_possible_elems, 0);
 115        assert(lru->map_fd != -1);
 116
 117        lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
 118        assert(lru->free_nodes);
 119
 120        INIT_LIST_HEAD(&lru->list);
 121        lru->cur_size = 0;
 122        lru->lru_size = lru_size;
 123        lru->nr_unique = lru->nr_misses = lru->total = 0;
 124}
 125
 126static void pfect_lru_destroy(struct pfect_lru *lru)
 127{
 128        close(lru->map_fd);
 129        free(lru->free_nodes);
 130}
 131
 132static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
 133                                      unsigned long long key)
 134{
 135        struct pfect_lru_node *node = NULL;
 136        int seen = 0;
 137
 138        lru->total++;
 139        if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
 140                if (node) {
 141                        list_move(&node->list, &lru->list);
 142                        return 1;
 143                }
 144                seen = 1;
 145        }
 146
 147        if (lru->cur_size < lru->lru_size) {
 148                node =  &lru->free_nodes[lru->cur_size++];
 149                INIT_LIST_HEAD(&node->list);
 150        } else {
 151                struct pfect_lru_node *null_node = NULL;
 152
 153                node = list_last_entry(&lru->list,
 154                                       struct pfect_lru_node,
 155                                       list);
 156                bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
 157        }
 158
 159        node->key = key;
 160        list_move(&node->list, &lru->list);
 161
 162        lru->nr_misses++;
 163        if (seen) {
 164                assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
 165        } else {
 166                lru->nr_unique++;
 167                assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
 168        }
 169
 170        return seen;
 171}
 172
 173static unsigned int read_keys(const char *dist_file,
 174                              unsigned long long **keys)
 175{
 176        struct stat fst;
 177        unsigned long long *retkeys;
 178        unsigned int counts = 0;
 179        int dist_fd;
 180        char *b, *l;
 181        int i;
 182
 183        dist_fd = open(dist_file, 0);
 184        assert(dist_fd != -1);
 185
 186        assert(fstat(dist_fd, &fst) == 0);
 187        b = malloc(fst.st_size);
 188        assert(b);
 189
 190        assert(read(dist_fd, b, fst.st_size) == fst.st_size);
 191        close(dist_fd);
 192        for (i = 0; i < fst.st_size; i++) {
 193                if (b[i] == '\n')
 194                        counts++;
 195        }
 196        counts++; /* in case the last line has no \n */
 197
 198        retkeys = malloc(counts * sizeof(unsigned long long));
 199        assert(retkeys);
 200
 201        counts = 0;
 202        for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
 203                retkeys[counts++] = strtoull(l, NULL, 10);
 204        free(b);
 205
 206        *keys = retkeys;
 207
 208        return counts;
 209}
 210
 211static int create_map(int map_type, int map_flags, unsigned int size)
 212{
 213        int map_fd;
 214
 215        map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
 216                                sizeof(unsigned long long), size, map_flags);
 217
 218        if (map_fd == -1)
 219                perror("bpf_create_map");
 220
 221        return map_fd;
 222}
 223
 224static int sched_next_online(int pid, int next_to_try)
 225{
 226        cpu_set_t cpuset;
 227
 228        if (next_to_try == nr_cpus)
 229                return -1;
 230
 231        while (next_to_try < nr_cpus) {
 232                CPU_ZERO(&cpuset);
 233                CPU_SET(next_to_try++, &cpuset);
 234                if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
 235                        break;
 236        }
 237
 238        return next_to_try;
 239}
 240
 241static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
 242                         void *data)
 243{
 244        int next_sched_cpu = 0;
 245        pid_t pid[tasks];
 246        int i;
 247
 248        for (i = 0; i < tasks; i++) {
 249                pid[i] = fork();
 250                if (pid[i] == 0) {
 251                        next_sched_cpu = sched_next_online(0, next_sched_cpu);
 252                        fn(i, data);
 253                        exit(0);
 254                } else if (pid[i] == -1) {
 255                        printf("couldn't spawn #%d process\n", i);
 256                        exit(1);
 257                }
 258                /* It is mostly redundant and just allow the parent
 259                 * process to update next_shced_cpu for the next child
 260                 * process
 261                 */
 262                next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
 263        }
 264        for (i = 0; i < tasks; i++) {
 265                int status;
 266
 267                assert(waitpid(pid[i], &status, 0) == pid[i]);
 268                assert(status == 0);
 269        }
 270}
 271
 272static void do_test_lru_dist(int task, void *data)
 273{
 274        unsigned int nr_misses = 0;
 275        struct pfect_lru pfect_lru;
 276        unsigned long long key, value = 1234;
 277        unsigned int i;
 278
 279        unsigned int lru_map_fd = ((unsigned int *)data)[0];
 280        unsigned int lru_size = ((unsigned int *)data)[1];
 281        unsigned long long key_offset = task * dist_key_counts;
 282
 283        pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
 284
 285        for (i = 0; i < dist_key_counts; i++) {
 286                key = dist_keys[i] + key_offset;
 287
 288                pfect_lru_lookup_or_insert(&pfect_lru, key);
 289
 290                if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
 291                        continue;
 292
 293                if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
 294                        printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
 295                               key, errno);
 296                        assert(0);
 297                }
 298
 299                nr_misses++;
 300        }
 301
 302        printf("    task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
 303               task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
 304               dist_key_counts);
 305        printf("    task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
 306               task, pfect_lru.nr_unique, pfect_lru.total,
 307               pfect_lru.nr_misses, pfect_lru.total);
 308
 309        pfect_lru_destroy(&pfect_lru);
 310        close(lru_map_fd);
 311}
 312
 313static void test_parallel_lru_dist(int map_type, int map_flags,
 314                                   int nr_tasks, unsigned int lru_size)
 315{
 316        int child_data[2];
 317        int lru_map_fd;
 318
 319        printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
 320               map_flags);
 321
 322        if (map_flags & BPF_F_NO_COMMON_LRU)
 323                lru_map_fd = create_map(map_type, map_flags,
 324                                        nr_cpus * lru_size);
 325        else
 326                lru_map_fd = create_map(map_type, map_flags,
 327                                        nr_tasks * lru_size);
 328        assert(lru_map_fd != -1);
 329
 330        child_data[0] = lru_map_fd;
 331        child_data[1] = lru_size;
 332
 333        run_parallel(nr_tasks, do_test_lru_dist, child_data);
 334
 335        close(lru_map_fd);
 336}
 337
 338static void test_lru_loss0(int map_type, int map_flags)
 339{
 340        unsigned long long key, value[nr_cpus];
 341        unsigned int old_unused_losses = 0;
 342        unsigned int new_unused_losses = 0;
 343        unsigned int used_losses = 0;
 344        int map_fd;
 345
 346        printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 347               map_flags);
 348
 349        assert(sched_next_online(0, 0) != -1);
 350
 351        if (map_flags & BPF_F_NO_COMMON_LRU)
 352                map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
 353        else
 354                map_fd = create_map(map_type, map_flags, 900);
 355
 356        assert(map_fd != -1);
 357
 358        value[0] = 1234;
 359
 360        for (key = 1; key <= 1000; key++) {
 361                int start_key, end_key;
 362
 363                assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
 364
 365                start_key = 101;
 366                end_key = min(key, 900);
 367
 368                while (start_key <= end_key) {
 369                        bpf_map_lookup_elem(map_fd, &start_key, value);
 370                        start_key++;
 371                }
 372        }
 373
 374        for (key = 1; key <= 1000; key++) {
 375                if (bpf_map_lookup_elem(map_fd, &key, value)) {
 376                        if (key <= 100)
 377                                old_unused_losses++;
 378                        else if (key <= 900)
 379                                used_losses++;
 380                        else
 381                                new_unused_losses++;
 382                }
 383        }
 384
 385        close(map_fd);
 386
 387        printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
 388               "newer-elem-losses:%d(/100)\n",
 389               old_unused_losses, used_losses, new_unused_losses);
 390}
 391
 392static void test_lru_loss1(int map_type, int map_flags)
 393{
 394        unsigned long long key, value[nr_cpus];
 395        int map_fd;
 396        unsigned int nr_losses = 0;
 397
 398        printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 399               map_flags);
 400
 401        assert(sched_next_online(0, 0) != -1);
 402
 403        if (map_flags & BPF_F_NO_COMMON_LRU)
 404                map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
 405        else
 406                map_fd = create_map(map_type, map_flags, 1000);
 407
 408        assert(map_fd != -1);
 409
 410        value[0] = 1234;
 411
 412        for (key = 1; key <= 1000; key++)
 413                assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
 414
 415        for (key = 1; key <= 1000; key++) {
 416                if (bpf_map_lookup_elem(map_fd, &key, value))
 417                        nr_losses++;
 418        }
 419
 420        close(map_fd);
 421
 422        printf("nr_losses:%d(/1000)\n", nr_losses);
 423}
 424
 425static void do_test_parallel_lru_loss(int task, void *data)
 426{
 427        const unsigned int nr_stable_elems = 1000;
 428        const unsigned int nr_repeats = 100000;
 429
 430        int map_fd = *(int *)data;
 431        unsigned long long stable_base;
 432        unsigned long long key, value[nr_cpus];
 433        unsigned long long next_ins_key;
 434        unsigned int nr_losses = 0;
 435        unsigned int i;
 436
 437        stable_base = task * nr_repeats * 2 + 1;
 438        next_ins_key = stable_base;
 439        value[0] = 1234;
 440        for (i = 0; i < nr_stable_elems; i++) {
 441                assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
 442                                       BPF_NOEXIST) == 0);
 443                next_ins_key++;
 444        }
 445
 446        for (i = 0; i < nr_repeats; i++) {
 447                int rn;
 448
 449                rn = rand();
 450
 451                if (rn % 10) {
 452                        key = rn % nr_stable_elems + stable_base;
 453                        bpf_map_lookup_elem(map_fd, &key, value);
 454                } else {
 455                        bpf_map_update_elem(map_fd, &next_ins_key, value,
 456                                        BPF_NOEXIST);
 457                        next_ins_key++;
 458                }
 459        }
 460
 461        key = stable_base;
 462        for (i = 0; i < nr_stable_elems; i++) {
 463                if (bpf_map_lookup_elem(map_fd, &key, value))
 464                        nr_losses++;
 465                key++;
 466        }
 467
 468        printf("    task:%d nr_losses:%u\n", task, nr_losses);
 469}
 470
 471static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
 472{
 473        int map_fd;
 474
 475        printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
 476               map_flags);
 477
 478        /* Give 20% more than the active working set */
 479        if (map_flags & BPF_F_NO_COMMON_LRU)
 480                map_fd = create_map(map_type, map_flags,
 481                                    nr_cpus * (1000 + 200));
 482        else
 483                map_fd = create_map(map_type, map_flags,
 484                                    nr_tasks * (1000 + 200));
 485
 486        assert(map_fd != -1);
 487
 488        run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
 489
 490        close(map_fd);
 491}
 492
 493int main(int argc, char **argv)
 494{
 495        struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 496        int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
 497        const char *dist_file;
 498        int nr_tasks = 1;
 499        int lru_size;
 500        int f;
 501
 502        if (argc < 4) {
 503                printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
 504                       argv[0]);
 505                return -1;
 506        }
 507
 508        dist_file = argv[1];
 509        lru_size = atoi(argv[2]);
 510        nr_tasks = atoi(argv[3]);
 511
 512        setbuf(stdout, NULL);
 513
 514        assert(!setrlimit(RLIMIT_MEMLOCK, &r));
 515
 516        srand(time(NULL));
 517
 518        nr_cpus = bpf_num_possible_cpus();
 519        assert(nr_cpus != -1);
 520        printf("nr_cpus:%d\n\n", nr_cpus);
 521
 522        nr_tasks = min(nr_tasks, nr_cpus);
 523
 524        dist_key_counts = read_keys(dist_file, &dist_keys);
 525        if (!dist_key_counts) {
 526                printf("%s has no key\n", dist_file);
 527                return -1;
 528        }
 529
 530        for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
 531                test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
 532                test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
 533                test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
 534                                       nr_tasks);
 535                test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
 536                                       nr_tasks, lru_size);
 537                printf("\n");
 538        }
 539
 540        free(dist_keys);
 541
 542        return 0;
 543}
 544