LXR linux/kernel/bpf/syscall.c

   1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful, but
   8 * WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10 * General Public License for more details.
  11 */
  12#include <linux/bpf.h>
  13#include <linux/bpf_trace.h>
  14#include <linux/syscalls.h>
  15#include <linux/slab.h>
  16#include <linux/sched/signal.h>
  17#include <linux/vmalloc.h>
  18#include <linux/mmzone.h>
  19#include <linux/anon_inodes.h>
  20#include <linux/file.h>
  21#include <linux/license.h>
  22#include <linux/filter.h>
  23#include <linux/version.h>
  24#include <linux/kernel.h>
  25
  26DEFINE_PER_CPU(int, bpf_prog_active);
  27
  28int sysctl_unprivileged_bpf_disabled __read_mostly;
  29
  30static const struct bpf_map_ops * const bpf_map_types[] = {
  31#define BPF_PROG_TYPE(_id, _ops)
  32#define BPF_MAP_TYPE(_id, _ops) \
  33        [_id] = &_ops,
  34#include <linux/bpf_types.h>
  35#undef BPF_PROG_TYPE
  36#undef BPF_MAP_TYPE
  37};
  38
  39static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  40{
  41        struct bpf_map *map;
  42
  43        if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
  44            !bpf_map_types[attr->map_type])
  45                return ERR_PTR(-EINVAL);
  46
  47        map = bpf_map_types[attr->map_type]->map_alloc(attr);
  48        if (IS_ERR(map))
  49                return map;
  50        map->ops = bpf_map_types[attr->map_type];
  51        map->map_type = attr->map_type;
  52        return map;
  53}
  54
  55void *bpf_map_area_alloc(size_t size)
  56{
  57        /* We definitely need __GFP_NORETRY, so OOM killer doesn't
  58         * trigger under memory pressure as we really just want to
  59         * fail instead.
  60         */
  61        const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
  62        void *area;
  63
  64        if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  65                area = kmalloc(size, GFP_USER | flags);
  66                if (area != NULL)
  67                        return area;
  68        }
  69
  70        return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
  71}
  72
  73void bpf_map_area_free(void *area)
  74{
  75        kvfree(area);
  76}
  77
  78int bpf_map_precharge_memlock(u32 pages)
  79{
  80        struct user_struct *user = get_current_user();
  81        unsigned long memlock_limit, cur;
  82
  83        memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  84        cur = atomic_long_read(&user->locked_vm);
  85        free_uid(user);
  86        if (cur + pages > memlock_limit)
  87                return -EPERM;
  88        return 0;
  89}
  90
  91static int bpf_map_charge_memlock(struct bpf_map *map)
  92{
  93        struct user_struct *user = get_current_user();
  94        unsigned long memlock_limit;
  95
  96        memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  97
  98        atomic_long_add(map->pages, &user->locked_vm);
  99
 100        if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 101                atomic_long_sub(map->pages, &user->locked_vm);
 102                free_uid(user);
 103                return -EPERM;
 104        }
 105        map->user = user;
 106        return 0;
 107}
 108
 109static void bpf_map_uncharge_memlock(struct bpf_map *map)
 110{
 111        struct user_struct *user = map->user;
 112
 113        atomic_long_sub(map->pages, &user->locked_vm);
 114        free_uid(user);
 115}
 116
 117/* called from workqueue */
 118static void bpf_map_free_deferred(struct work_struct *work)
 119{
 120        struct bpf_map *map = container_of(work, struct bpf_map, work);
 121
 122        bpf_map_uncharge_memlock(map);
 123        /* implementation dependent freeing */
 124        map->ops->map_free(map);
 125}
 126
 127static void bpf_map_put_uref(struct bpf_map *map)
 128{
 129        if (atomic_dec_and_test(&map->usercnt)) {
 130                if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 131                        bpf_fd_array_map_clear(map);
 132        }
 133}
 134
 135/* decrement map refcnt and schedule it for freeing via workqueue
 136 * (unrelying map implementation ops->map_free() might sleep)
 137 */
 138void bpf_map_put(struct bpf_map *map)
 139{
 140        if (atomic_dec_and_test(&map->refcnt)) {
 141                INIT_WORK(&map->work, bpf_map_free_deferred);
 142                schedule_work(&map->work);
 143        }
 144}
 145
 146void bpf_map_put_with_uref(struct bpf_map *map)
 147{
 148        bpf_map_put_uref(map);
 149        bpf_map_put(map);
 150}
 151
 152static int bpf_map_release(struct inode *inode, struct file *filp)
 153{
 154        struct bpf_map *map = filp->private_data;
 155
 156        if (map->ops->map_release)
 157                map->ops->map_release(map, filp);
 158
 159        bpf_map_put_with_uref(map);
 160        return 0;
 161}
 162
 163#ifdef CONFIG_PROC_FS
 164static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 165{
 166        const struct bpf_map *map = filp->private_data;
 167        const struct bpf_array *array;
 168        u32 owner_prog_type = 0;
 169
 170        if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 171                array = container_of(map, struct bpf_array, map);
 172                owner_prog_type = array->owner_prog_type;
 173        }
 174
 175        seq_printf(m,
 176                   "map_type:\t%u\n"
 177                   "key_size:\t%u\n"
 178                   "value_size:\t%u\n"
 179                   "max_entries:\t%u\n"
 180                   "map_flags:\t%#x\n"
 181                   "memlock:\t%llu\n",
 182                   map->map_type,
 183                   map->key_size,
 184                   map->value_size,
 185                   map->max_entries,
 186                   map->map_flags,
 187                   map->pages * 1ULL << PAGE_SHIFT);
 188
 189        if (owner_prog_type)
 190                seq_printf(m, "owner_prog_type:\t%u\n",
 191                           owner_prog_type);
 192}
 193#endif
 194
 195static const struct file_operations bpf_map_fops = {
 196#ifdef CONFIG_PROC_FS
 197        .show_fdinfo    = bpf_map_show_fdinfo,
 198#endif
 199        .release        = bpf_map_release,
 200};
 201
 202int bpf_map_new_fd(struct bpf_map *map)
 203{
 204        return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 205                                O_RDWR | O_CLOEXEC);
 206}
 207
 208/* helper macro to check that unused fields 'union bpf_attr' are zero */
 209#define CHECK_ATTR(CMD) \
 210        memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 211                   sizeof(attr->CMD##_LAST_FIELD), 0, \
 212                   sizeof(*attr) - \
 213                   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 214                   sizeof(attr->CMD##_LAST_FIELD)) != NULL
 215
 216#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
 217/* called via syscall */
 218static int map_create(union bpf_attr *attr)
 219{
 220        struct bpf_map *map;
 221        int err;
 222
 223        err = CHECK_ATTR(BPF_MAP_CREATE);
 224        if (err)
 225                return -EINVAL;
 226
 227        /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 228        map = find_and_alloc_map(attr);
 229        if (IS_ERR(map))
 230                return PTR_ERR(map);
 231
 232        atomic_set(&map->refcnt, 1);
 233        atomic_set(&map->usercnt, 1);
 234
 235        err = bpf_map_charge_memlock(map);
 236        if (err)
 237                goto free_map_nouncharge;
 238
 239        err = bpf_map_new_fd(map);
 240        if (err < 0)
 241                /* failed to allocate fd */
 242                goto free_map;
 243
 244        trace_bpf_map_create(map, err);
 245        return err;
 246
 247free_map:
 248        bpf_map_uncharge_memlock(map);
 249free_map_nouncharge:
 250        map->ops->map_free(map);
 251        return err;
 252}
 253
 254/* if error is returned, fd is released.
 255 * On success caller should complete fd access with matching fdput()
 256 */
 257struct bpf_map *__bpf_map_get(struct fd f)
 258{
 259        if (!f.file)
 260                return ERR_PTR(-EBADF);
 261        if (f.file->f_op != &bpf_map_fops) {
 262                fdput(f);
 263                return ERR_PTR(-EINVAL);
 264        }
 265
 266        return f.file->private_data;
 267}
 268
 269/* prog's and map's refcnt limit */
 270#define BPF_MAX_REFCNT 32768
 271
 272struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 273{
 274        if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 275                atomic_dec(&map->refcnt);
 276                return ERR_PTR(-EBUSY);
 277        }
 278        if (uref)
 279                atomic_inc(&map->usercnt);
 280        return map;
 281}
 282
 283struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 284{
 285        struct fd f = fdget(ufd);
 286        struct bpf_map *map;
 287
 288        map = __bpf_map_get(f);
 289        if (IS_ERR(map))
 290                return map;
 291
 292        map = bpf_map_inc(map, true);
 293        fdput(f);
 294
 295        return map;
 296}
 297
 298int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 299{
 300        return -ENOTSUPP;
 301}
 302
 303/* last field in 'union bpf_attr' used by this command */
 304#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 305
 306static int map_lookup_elem(union bpf_attr *attr)
 307{
 308        void __user *ukey = u64_to_user_ptr(attr->key);
 309        void __user *uvalue = u64_to_user_ptr(attr->value);
 310        int ufd = attr->map_fd;
 311        struct bpf_map *map;
 312        void *key, *value, *ptr;
 313        u32 value_size;
 314        struct fd f;
 315        int err;
 316
 317        if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 318                return -EINVAL;
 319
 320        f = fdget(ufd);
 321        map = __bpf_map_get(f);
 322        if (IS_ERR(map))
 323                return PTR_ERR(map);
 324
 325        err = -ENOMEM;
 326        key = kmalloc(map->key_size, GFP_USER);
 327        if (!key)
 328                goto err_put;
 329
 330        err = -EFAULT;
 331        if (copy_from_user(key, ukey, map->key_size) != 0)
 332                goto free_key;
 333
 334        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 335            map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 336            map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 337                value_size = round_up(map->value_size, 8) * num_possible_cpus();
 338        else
 339                value_size = map->value_size;
 340
 341        err = -ENOMEM;
 342        value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 343        if (!value)
 344                goto free_key;
 345
 346        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 347            map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 348                err = bpf_percpu_hash_copy(map, key, value);
 349        } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 350                err = bpf_percpu_array_copy(map, key, value);
 351        } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 352                err = bpf_stackmap_copy(map, key, value);
 353        } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 354                   map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 355                err = -ENOTSUPP;
 356        } else {
 357                rcu_read_lock();
 358                ptr = map->ops->map_lookup_elem(map, key);
 359                if (ptr)
 360                        memcpy(value, ptr, value_size);
 361                rcu_read_unlock();
 362                err = ptr ? 0 : -ENOENT;
 363        }
 364
 365        if (err)
 366                goto free_value;
 367
 368        err = -EFAULT;
 369        if (copy_to_user(uvalue, value, value_size) != 0)
 370                goto free_value;
 371
 372        trace_bpf_map_lookup_elem(map, ufd, key, value);
 373        err = 0;
 374
 375free_value:
 376        kfree(value);
 377free_key:
 378        kfree(key);
 379err_put:
 380        fdput(f);
 381        return err;
 382}
 383
 384#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 385
 386static int map_update_elem(union bpf_attr *attr)
 387{
 388        void __user *ukey = u64_to_user_ptr(attr->key);
 389        void __user *uvalue = u64_to_user_ptr(attr->value);
 390        int ufd = attr->map_fd;
 391        struct bpf_map *map;
 392        void *key, *value;
 393        u32 value_size;
 394        struct fd f;
 395        int err;
 396
 397        if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 398                return -EINVAL;
 399
 400        f = fdget(ufd);
 401        map = __bpf_map_get(f);
 402        if (IS_ERR(map))
 403                return PTR_ERR(map);
 404
 405        err = -ENOMEM;
 406        key = kmalloc(map->key_size, GFP_USER);
 407        if (!key)
 408                goto err_put;
 409
 410        err = -EFAULT;
 411        if (copy_from_user(key, ukey, map->key_size) != 0)
 412                goto free_key;
 413
 414        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 415            map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 416            map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 417                value_size = round_up(map->value_size, 8) * num_possible_cpus();
 418        else
 419                value_size = map->value_size;
 420
 421        err = -ENOMEM;
 422        value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 423        if (!value)
 424                goto free_key;
 425
 426        err = -EFAULT;
 427        if (copy_from_user(value, uvalue, value_size) != 0)
 428                goto free_value;
 429
 430        /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 431         * inside bpf map update or delete otherwise deadlocks are possible
 432         */
 433        preempt_disable();
 434        __this_cpu_inc(bpf_prog_active);
 435        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 436            map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 437                err = bpf_percpu_hash_update(map, key, value, attr->flags);
 438        } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 439                err = bpf_percpu_array_update(map, key, value, attr->flags);
 440        } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 441                   map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 442                   map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
 443                   map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
 444                rcu_read_lock();
 445                err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 446                                                   attr->flags);
 447                rcu_read_unlock();
 448        } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 449                rcu_read_lock();
 450                err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 451                                                  attr->flags);
 452                rcu_read_unlock();
 453        } else {
 454                rcu_read_lock();
 455                err = map->ops->map_update_elem(map, key, value, attr->flags);
 456                rcu_read_unlock();
 457        }
 458        __this_cpu_dec(bpf_prog_active);
 459        preempt_enable();
 460
 461        if (!err)
 462                trace_bpf_map_update_elem(map, ufd, key, value);
 463free_value:
 464        kfree(value);
 465free_key:
 466        kfree(key);
 467err_put:
 468        fdput(f);
 469        return err;
 470}
 471
 472#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 473
 474static int map_delete_elem(union bpf_attr *attr)
 475{
 476        void __user *ukey = u64_to_user_ptr(attr->key);
 477        int ufd = attr->map_fd;
 478        struct bpf_map *map;
 479        struct fd f;
 480        void *key;
 481        int err;
 482
 483        if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 484                return -EINVAL;
 485
 486        f = fdget(ufd);
 487        map = __bpf_map_get(f);
 488        if (IS_ERR(map))
 489                return PTR_ERR(map);
 490
 491        err = -ENOMEM;
 492        key = kmalloc(map->key_size, GFP_USER);
 493        if (!key)
 494                goto err_put;
 495
 496        err = -EFAULT;
 497        if (copy_from_user(key, ukey, map->key_size) != 0)
 498                goto free_key;
 499
 500        preempt_disable();
 501        __this_cpu_inc(bpf_prog_active);
 502        rcu_read_lock();
 503        err = map->ops->map_delete_elem(map, key);
 504        rcu_read_unlock();
 505        __this_cpu_dec(bpf_prog_active);
 506        preempt_enable();
 507
 508        if (!err)
 509                trace_bpf_map_delete_elem(map, ufd, key);
 510free_key:
 511        kfree(key);
 512err_put:
 513        fdput(f);
 514        return err;
 515}
 516
 517/* last field in 'union bpf_attr' used by this command */
 518#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 519
 520static int map_get_next_key(union bpf_attr *attr)
 521{
 522        void __user *ukey = u64_to_user_ptr(attr->key);
 523        void __user *unext_key = u64_to_user_ptr(attr->next_key);
 524        int ufd = attr->map_fd;
 525        struct bpf_map *map;
 526        void *key, *next_key;
 527        struct fd f;
 528        int err;
 529
 530        if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 531                return -EINVAL;
 532
 533        f = fdget(ufd);
 534        map = __bpf_map_get(f);
 535        if (IS_ERR(map))
 536                return PTR_ERR(map);
 537
 538        if (ukey) {
 539                err = -ENOMEM;
 540                key = kmalloc(map->key_size, GFP_USER);
 541                if (!key)
 542                        goto err_put;
 543
 544                err = -EFAULT;
 545                if (copy_from_user(key, ukey, map->key_size) != 0)
 546                        goto free_key;
 547        } else {
 548                key = NULL;
 549        }
 550
 551        err = -ENOMEM;
 552        next_key = kmalloc(map->key_size, GFP_USER);
 553        if (!next_key)
 554                goto free_key;
 555
 556        rcu_read_lock();
 557        err = map->ops->map_get_next_key(map, key, next_key);
 558        rcu_read_unlock();
 559        if (err)
 560                goto free_next_key;
 561
 562        err = -EFAULT;
 563        if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 564                goto free_next_key;
 565
 566        trace_bpf_map_next_key(map, ufd, key, next_key);
 567        err = 0;
 568
 569free_next_key:
 570        kfree(next_key);
 571free_key:
 572        kfree(key);
 573err_put:
 574        fdput(f);
 575        return err;
 576}
 577
 578static const struct bpf_verifier_ops * const bpf_prog_types[] = {
 579#define BPF_PROG_TYPE(_id, _ops) \
 580        [_id] = &_ops,
 581#define BPF_MAP_TYPE(_id, _ops)
 582#include <linux/bpf_types.h>
 583#undef BPF_PROG_TYPE
 584#undef BPF_MAP_TYPE
 585};
 586
 587static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 588{
 589        if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
 590                return -EINVAL;
 591
 592        prog->aux->ops = bpf_prog_types[type];
 593        prog->type = type;
 594        return 0;
 595}
 596
 597/* drop refcnt on maps used by eBPF program and free auxilary data */
 598static void free_used_maps(struct bpf_prog_aux *aux)
 599{
 600        int i;
 601
 602        for (i = 0; i < aux->used_map_cnt; i++)
 603                bpf_map_put(aux->used_maps[i]);
 604
 605        kfree(aux->used_maps);
 606}
 607
 608int __bpf_prog_charge(struct user_struct *user, u32 pages)
 609{
 610        unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 611        unsigned long user_bufs;
 612
 613        if (user) {
 614                user_bufs = atomic_long_add_return(pages, &user->locked_vm);
 615                if (user_bufs > memlock_limit) {
 616                        atomic_long_sub(pages, &user->locked_vm);
 617                        return -EPERM;
 618                }
 619        }
 620
 621        return 0;
 622}
 623
 624void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 625{
 626        if (user)
 627                atomic_long_sub(pages, &user->locked_vm);
 628}
 629
 630static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 631{
 632        struct user_struct *user = get_current_user();
 633        int ret;
 634
 635        ret = __bpf_prog_charge(user, prog->pages);
 636        if (ret) {
 637                free_uid(user);
 638                return ret;
 639        }
 640
 641        prog->aux->user = user;
 642        return 0;
 643}
 644
 645static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 646{
 647        struct user_struct *user = prog->aux->user;
 648
 649        __bpf_prog_uncharge(user, prog->pages);
 650        free_uid(user);
 651}
 652
 653static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 654{
 655        struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 656
 657        free_used_maps(aux);
 658        bpf_prog_uncharge_memlock(aux->prog);
 659        bpf_prog_free(aux->prog);
 660}
 661
 662void bpf_prog_put(struct bpf_prog *prog)
 663{
 664        if (atomic_dec_and_test(&prog->aux->refcnt)) {
 665                trace_bpf_prog_put_rcu(prog);
 666                bpf_prog_kallsyms_del(prog);
 667                call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 668        }
 669}
 670EXPORT_SYMBOL_GPL(bpf_prog_put);
 671
 672static int bpf_prog_release(struct inode *inode, struct file *filp)
 673{
 674        struct bpf_prog *prog = filp->private_data;
 675
 676        bpf_prog_put(prog);
 677        return 0;
 678}
 679
 680#ifdef CONFIG_PROC_FS
 681static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 682{
 683        const struct bpf_prog *prog = filp->private_data;
 684        char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 685
 686        bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 687        seq_printf(m,
 688                   "prog_type:\t%u\n"
 689                   "prog_jited:\t%u\n"
 690                   "prog_tag:\t%s\n"
 691                   "memlock:\t%llu\n",
 692                   prog->type,
 693                   prog->jited,
 694                   prog_tag,
 695                   prog->pages * 1ULL << PAGE_SHIFT);
 696}
 697#endif
 698
 699static const struct file_operations bpf_prog_fops = {
 700#ifdef CONFIG_PROC_FS
 701        .show_fdinfo    = bpf_prog_show_fdinfo,
 702#endif
 703        .release        = bpf_prog_release,
 704};
 705
 706int bpf_prog_new_fd(struct bpf_prog *prog)
 707{
 708        return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 709                                O_RDWR | O_CLOEXEC);
 710}
 711
 712static struct bpf_prog *____bpf_prog_get(struct fd f)
 713{
 714        if (!f.file)
 715                return ERR_PTR(-EBADF);
 716        if (f.file->f_op != &bpf_prog_fops) {
 717                fdput(f);
 718                return ERR_PTR(-EINVAL);
 719        }
 720
 721        return f.file->private_data;
 722}
 723
 724struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 725{
 726        if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 727                atomic_sub(i, &prog->aux->refcnt);
 728                return ERR_PTR(-EBUSY);
 729        }
 730        return prog;
 731}
 732EXPORT_SYMBOL_GPL(bpf_prog_add);
 733
 734void bpf_prog_sub(struct bpf_prog *prog, int i)
 735{
 736        /* Only to be used for undoing previous bpf_prog_add() in some
 737         * error path. We still know that another entity in our call
 738         * path holds a reference to the program, thus atomic_sub() can
 739         * be safely used in such cases!
 740         */
 741        WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 742}
 743EXPORT_SYMBOL_GPL(bpf_prog_sub);
 744
 745struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 746{
 747        return bpf_prog_add(prog, 1);
 748}
 749EXPORT_SYMBOL_GPL(bpf_prog_inc);
 750
 751static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 752{
 753        struct fd f = fdget(ufd);
 754        struct bpf_prog *prog;
 755
 756        prog = ____bpf_prog_get(f);
 757        if (IS_ERR(prog))
 758                return prog;
 759        if (type && prog->type != *type) {
 760                prog = ERR_PTR(-EINVAL);
 761                goto out;
 762        }
 763
 764        prog = bpf_prog_inc(prog);
 765out:
 766        fdput(f);
 767        return prog;
 768}
 769
 770struct bpf_prog *bpf_prog_get(u32 ufd)
 771{
 772        return __bpf_prog_get(ufd, NULL);
 773}
 774
 775struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 776{
 777        struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
 778
 779        if (!IS_ERR(prog))
 780                trace_bpf_prog_get_type(prog);
 781        return prog;
 782}
 783EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 784
 785/* last field in 'union bpf_attr' used by this command */
 786#define BPF_PROG_LOAD_LAST_FIELD prog_flags
 787
 788static int bpf_prog_load(union bpf_attr *attr)
 789{
 790        enum bpf_prog_type type = attr->prog_type;
 791        struct bpf_prog *prog;
 792        int err;
 793        char license[128];
 794        bool is_gpl;
 795
 796        if (CHECK_ATTR(BPF_PROG_LOAD))
 797                return -EINVAL;
 798
 799        if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
 800                return -EINVAL;
 801
 802        /* copy eBPF program license from user space */
 803        if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 804                              sizeof(license) - 1) < 0)
 805                return -EFAULT;
 806        license[sizeof(license) - 1] = 0;
 807
 808        /* eBPF programs must be GPL compatible to use GPL-ed functions */
 809        is_gpl = license_is_gpl_compatible(license);
 810
 811        if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 812                return -E2BIG;
 813
 814        if (type == BPF_PROG_TYPE_KPROBE &&
 815            attr->kern_version != LINUX_VERSION_CODE)
 816                return -EINVAL;
 817
 818        if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 819                return -EPERM;
 820
 821        /* plain bpf_prog allocation */
 822        prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 823        if (!prog)
 824                return -ENOMEM;
 825
 826        err = bpf_prog_charge_memlock(prog);
 827        if (err)
 828                goto free_prog_nouncharge;
 829
 830        prog->len = attr->insn_cnt;
 831
 832        err = -EFAULT;
 833        if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 834                           bpf_prog_insn_size(prog)) != 0)
 835                goto free_prog;
 836
 837        prog->orig_prog = NULL;
 838        prog->jited = 0;
 839
 840        atomic_set(&prog->aux->refcnt, 1);
 841        prog->gpl_compatible = is_gpl ? 1 : 0;
 842
 843        /* find program type: socket_filter vs tracing_filter */
 844        err = find_prog_type(type, prog);
 845        if (err < 0)
 846                goto free_prog;
 847
 848        /* run eBPF verifier */
 849        err = bpf_check(&prog, attr);
 850        if (err < 0)
 851                goto free_used_maps;
 852
 853        /* eBPF program is ready to be JITed */
 854        prog = bpf_prog_select_runtime(prog, &err);
 855        if (err < 0)
 856                goto free_used_maps;
 857
 858        err = bpf_prog_new_fd(prog);
 859        if (err < 0)
 860                /* failed to allocate fd */
 861                goto free_used_maps;
 862
 863        bpf_prog_kallsyms_add(prog);
 864        trace_bpf_prog_load(prog, err);
 865        return err;
 866
 867free_used_maps:
 868        free_used_maps(prog->aux);
 869free_prog:
 870        bpf_prog_uncharge_memlock(prog);
 871free_prog_nouncharge:
 872        bpf_prog_free(prog);
 873        return err;
 874}
 875
 876#define BPF_OBJ_LAST_FIELD bpf_fd
 877
 878static int bpf_obj_pin(const union bpf_attr *attr)
 879{
 880        if (CHECK_ATTR(BPF_OBJ))
 881                return -EINVAL;
 882
 883        return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 884}
 885
 886static int bpf_obj_get(const union bpf_attr *attr)
 887{
 888        if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 889                return -EINVAL;
 890
 891        return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 892}
 893
 894#ifdef CONFIG_CGROUP_BPF
 895
 896#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 897
 898static int bpf_prog_attach(const union bpf_attr *attr)
 899{
 900        enum bpf_prog_type ptype;
 901        struct bpf_prog *prog;
 902        struct cgroup *cgrp;
 903        int ret;
 904
 905        if (!capable(CAP_NET_ADMIN))
 906                return -EPERM;
 907
 908        if (CHECK_ATTR(BPF_PROG_ATTACH))
 909                return -EINVAL;
 910
 911        if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
 912                return -EINVAL;
 913
 914        switch (attr->attach_type) {
 915        case BPF_CGROUP_INET_INGRESS:
 916        case BPF_CGROUP_INET_EGRESS:
 917                ptype = BPF_PROG_TYPE_CGROUP_SKB;
 918                break;
 919        case BPF_CGROUP_INET_SOCK_CREATE:
 920                ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 921                break;
 922        default:
 923                return -EINVAL;
 924        }
 925
 926        prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 927        if (IS_ERR(prog))
 928                return PTR_ERR(prog);
 929
 930        cgrp = cgroup_get_from_fd(attr->target_fd);
 931        if (IS_ERR(cgrp)) {
 932                bpf_prog_put(prog);
 933                return PTR_ERR(cgrp);
 934        }
 935
 936        ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
 937                                attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
 938        if (ret)
 939                bpf_prog_put(prog);
 940        cgroup_put(cgrp);
 941
 942        return ret;
 943}
 944
 945#define BPF_PROG_DETACH_LAST_FIELD attach_type
 946
 947static int bpf_prog_detach(const union bpf_attr *attr)
 948{
 949        struct cgroup *cgrp;
 950        int ret;
 951
 952        if (!capable(CAP_NET_ADMIN))
 953                return -EPERM;
 954
 955        if (CHECK_ATTR(BPF_PROG_DETACH))
 956                return -EINVAL;
 957
 958        switch (attr->attach_type) {
 959        case BPF_CGROUP_INET_INGRESS:
 960        case BPF_CGROUP_INET_EGRESS:
 961        case BPF_CGROUP_INET_SOCK_CREATE:
 962                cgrp = cgroup_get_from_fd(attr->target_fd);
 963                if (IS_ERR(cgrp))
 964                        return PTR_ERR(cgrp);
 965
 966                ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
 967                cgroup_put(cgrp);
 968                break;
 969
 970        default:
 971                return -EINVAL;
 972        }
 973
 974        return ret;
 975}
 976#endif /* CONFIG_CGROUP_BPF */
 977
 978#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 979
 980static int bpf_prog_test_run(const union bpf_attr *attr,
 981                             union bpf_attr __user *uattr)
 982{
 983        struct bpf_prog *prog;
 984        int ret = -ENOTSUPP;
 985
 986        if (CHECK_ATTR(BPF_PROG_TEST_RUN))
 987                return -EINVAL;
 988
 989        prog = bpf_prog_get(attr->test.prog_fd);
 990        if (IS_ERR(prog))
 991                return PTR_ERR(prog);
 992
 993        if (prog->aux->ops->test_run)
 994                ret = prog->aux->ops->test_run(prog, attr, uattr);
 995
 996        bpf_prog_put(prog);
 997        return ret;
 998}
 999
1000SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)

1001{
1002        union bpf_attr attr = {};
1003        int err;
1004
1005        if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
1006                return -EPERM;
1007
1008        if (!access_ok(VERIFY_READ, uattr, 1))
1009                return -EFAULT;
1010
1011        if (size > PAGE_SIZE)   /* silly large */
1012                return -E2BIG;
1013
1014        /* If we're handed a bigger struct than we know of,
1015         * ensure all the unknown bits are 0 - i.e. new
1016         * user-space does not rely on any kernel feature
1017         * extensions we dont know about yet.
1018         */
1019        if (size > sizeof(attr)) {
1020                unsigned char __user *addr;
1021                unsigned char __user *end;
1022                unsigned char val;
1023
1024                addr = (void __user *)uattr + sizeof(attr);
1025                end  = (void __user *)uattr + size;
1026
1027                for (; addr < end; addr++) {
1028                        err = get_user(val, addr);
1029                        if (err)
1030                                return err;
1031                        if (val)
1032                                return -E2BIG;
1033                }
1034                size = sizeof(attr);
1035        }
1036
1037        /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1038        if (copy_from_user(&attr, uattr, size) != 0)
1039                return -EFAULT;
1040
1041        switch (cmd) {
1042        case BPF_MAP_CREATE:
1043                err = map_create(&attr);
1044                break;
1045        case BPF_MAP_LOOKUP_ELEM:
1046                err = map_lookup_elem(&attr);
1047                break;
1048        case BPF_MAP_UPDATE_ELEM:
1049                err = map_update_elem(&attr);
1050                break;
1051        case BPF_MAP_DELETE_ELEM:
1052                err = map_delete_elem(&attr);
1053                break;
1054        case BPF_MAP_GET_NEXT_KEY:
1055                err = map_get_next_key(&attr);
1056                break;
1057        case BPF_PROG_LOAD:
1058                err = bpf_prog_load(&attr);
1059                break;
1060        case BPF_OBJ_PIN:
1061                err = bpf_obj_pin(&attr);
1062                break;
1063        case BPF_OBJ_GET:
1064                err = bpf_obj_get(&attr);
1065                break;
1066#ifdef CONFIG_CGROUP_BPF
1067        case BPF_PROG_ATTACH:
1068                err = bpf_prog_attach(&attr);
1069                break;
1070        case BPF_PROG_DETACH:
1071                err = bpf_prog_detach(&attr);
1072                break;
1073#endif
1074        case BPF_PROG_TEST_RUN:
1075                err = bpf_prog_test_run(&attr, uattr);
1076                break;
1077        default:
1078                err = -EINVAL;
1079                break;
1080        }
1081
1082        return err;
1083}
1084