linux/kernel/bpf/helpers.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3 */
   4#include <linux/bpf.h>
   5#include <linux/rcupdate.h>
   6#include <linux/random.h>
   7#include <linux/smp.h>
   8#include <linux/topology.h>
   9#include <linux/ktime.h>
  10#include <linux/sched.h>
  11#include <linux/uidgid.h>
  12#include <linux/filter.h>
  13#include <linux/ctype.h>
  14#include <linux/jiffies.h>
  15#include <linux/pid_namespace.h>
  16#include <linux/proc_ns.h>
  17#include <linux/security.h>
  18
  19#include "../../lib/kstrtox.h"
  20
  21/* If kernel subsystem is allowing eBPF programs to call this function,
  22 * inside its own verifier_ops->get_func_proto() callback it should return
  23 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
  24 *
  25 * Different map implementations will rely on rcu in map methods
  26 * lookup/update/delete, therefore eBPF programs must run under rcu lock
  27 * if program is allowed to access maps, so check rcu_read_lock_held in
  28 * all three functions.
  29 */
  30BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
  31{
  32        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  33        return (unsigned long) map->ops->map_lookup_elem(map, key);
  34}
  35
  36const struct bpf_func_proto bpf_map_lookup_elem_proto = {
  37        .func           = bpf_map_lookup_elem,
  38        .gpl_only       = false,
  39        .pkt_access     = true,
  40        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
  41        .arg1_type      = ARG_CONST_MAP_PTR,
  42        .arg2_type      = ARG_PTR_TO_MAP_KEY,
  43};
  44
  45BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
  46           void *, value, u64, flags)
  47{
  48        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  49        return map->ops->map_update_elem(map, key, value, flags);
  50}
  51
  52const struct bpf_func_proto bpf_map_update_elem_proto = {
  53        .func           = bpf_map_update_elem,
  54        .gpl_only       = false,
  55        .pkt_access     = true,
  56        .ret_type       = RET_INTEGER,
  57        .arg1_type      = ARG_CONST_MAP_PTR,
  58        .arg2_type      = ARG_PTR_TO_MAP_KEY,
  59        .arg3_type      = ARG_PTR_TO_MAP_VALUE,
  60        .arg4_type      = ARG_ANYTHING,
  61};
  62
  63BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
  64{
  65        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  66        return map->ops->map_delete_elem(map, key);
  67}
  68
  69const struct bpf_func_proto bpf_map_delete_elem_proto = {
  70        .func           = bpf_map_delete_elem,
  71        .gpl_only       = false,
  72        .pkt_access     = true,
  73        .ret_type       = RET_INTEGER,
  74        .arg1_type      = ARG_CONST_MAP_PTR,
  75        .arg2_type      = ARG_PTR_TO_MAP_KEY,
  76};
  77
  78BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
  79{
  80        return map->ops->map_push_elem(map, value, flags);
  81}
  82
  83const struct bpf_func_proto bpf_map_push_elem_proto = {
  84        .func           = bpf_map_push_elem,
  85        .gpl_only       = false,
  86        .pkt_access     = true,
  87        .ret_type       = RET_INTEGER,
  88        .arg1_type      = ARG_CONST_MAP_PTR,
  89        .arg2_type      = ARG_PTR_TO_MAP_VALUE,
  90        .arg3_type      = ARG_ANYTHING,
  91};
  92
  93BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
  94{
  95        return map->ops->map_pop_elem(map, value);
  96}
  97
  98const struct bpf_func_proto bpf_map_pop_elem_proto = {
  99        .func           = bpf_map_pop_elem,
 100        .gpl_only       = false,
 101        .ret_type       = RET_INTEGER,
 102        .arg1_type      = ARG_CONST_MAP_PTR,
 103        .arg2_type      = ARG_PTR_TO_UNINIT_MAP_VALUE,
 104};
 105
 106BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
 107{
 108        return map->ops->map_peek_elem(map, value);
 109}
 110
 111const struct bpf_func_proto bpf_map_peek_elem_proto = {
 112        .func           = bpf_map_peek_elem,
 113        .gpl_only       = false,
 114        .ret_type       = RET_INTEGER,
 115        .arg1_type      = ARG_CONST_MAP_PTR,
 116        .arg2_type      = ARG_PTR_TO_UNINIT_MAP_VALUE,
 117};
 118
 119const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 120        .func           = bpf_user_rnd_u32,
 121        .gpl_only       = false,
 122        .ret_type       = RET_INTEGER,
 123};
 124
 125BPF_CALL_0(bpf_get_smp_processor_id)
 126{
 127        return smp_processor_id();
 128}
 129
 130const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
 131        .func           = bpf_get_smp_processor_id,
 132        .gpl_only       = false,
 133        .ret_type       = RET_INTEGER,
 134};
 135
 136BPF_CALL_0(bpf_get_numa_node_id)
 137{
 138        return numa_node_id();
 139}
 140
 141const struct bpf_func_proto bpf_get_numa_node_id_proto = {
 142        .func           = bpf_get_numa_node_id,
 143        .gpl_only       = false,
 144        .ret_type       = RET_INTEGER,
 145};
 146
 147BPF_CALL_0(bpf_ktime_get_ns)
 148{
 149        /* NMI safe access to clock monotonic */
 150        return ktime_get_mono_fast_ns();
 151}
 152
 153const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 154        .func           = bpf_ktime_get_ns,
 155        .gpl_only       = false,
 156        .ret_type       = RET_INTEGER,
 157};
 158
 159BPF_CALL_0(bpf_ktime_get_boot_ns)
 160{
 161        /* NMI safe access to clock boottime */
 162        return ktime_get_boot_fast_ns();
 163}
 164
 165const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
 166        .func           = bpf_ktime_get_boot_ns,
 167        .gpl_only       = false,
 168        .ret_type       = RET_INTEGER,
 169};
 170
 171BPF_CALL_0(bpf_ktime_get_coarse_ns)
 172{
 173        return ktime_get_coarse_ns();
 174}
 175
 176const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
 177        .func           = bpf_ktime_get_coarse_ns,
 178        .gpl_only       = false,
 179        .ret_type       = RET_INTEGER,
 180};
 181
 182BPF_CALL_0(bpf_get_current_pid_tgid)
 183{
 184        struct task_struct *task = current;
 185
 186        if (unlikely(!task))
 187                return -EINVAL;
 188
 189        return (u64) task->tgid << 32 | task->pid;
 190}
 191
 192const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
 193        .func           = bpf_get_current_pid_tgid,
 194        .gpl_only       = false,
 195        .ret_type       = RET_INTEGER,
 196};
 197
 198BPF_CALL_0(bpf_get_current_uid_gid)
 199{
 200        struct task_struct *task = current;
 201        kuid_t uid;
 202        kgid_t gid;
 203
 204        if (unlikely(!task))
 205                return -EINVAL;
 206
 207        current_uid_gid(&uid, &gid);
 208        return (u64) from_kgid(&init_user_ns, gid) << 32 |
 209                     from_kuid(&init_user_ns, uid);
 210}
 211
 212const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
 213        .func           = bpf_get_current_uid_gid,
 214        .gpl_only       = false,
 215        .ret_type       = RET_INTEGER,
 216};
 217
 218BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
 219{
 220        struct task_struct *task = current;
 221
 222        if (unlikely(!task))
 223                goto err_clear;
 224
 225        strncpy(buf, task->comm, size);
 226
 227        /* Verifier guarantees that size > 0. For task->comm exceeding
 228         * size, guarantee that buf is %NUL-terminated. Unconditionally
 229         * done here to save the size test.
 230         */
 231        buf[size - 1] = 0;
 232        return 0;
 233err_clear:
 234        memset(buf, 0, size);
 235        return -EINVAL;
 236}
 237
 238const struct bpf_func_proto bpf_get_current_comm_proto = {
 239        .func           = bpf_get_current_comm,
 240        .gpl_only       = false,
 241        .ret_type       = RET_INTEGER,
 242        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
 243        .arg2_type      = ARG_CONST_SIZE,
 244};
 245
 246#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
 247
 248static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
 249{
 250        arch_spinlock_t *l = (void *)lock;
 251        union {
 252                __u32 val;
 253                arch_spinlock_t lock;
 254        } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
 255
 256        compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
 257        BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
 258        BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
 259        arch_spin_lock(l);
 260}
 261
 262static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
 263{
 264        arch_spinlock_t *l = (void *)lock;
 265
 266        arch_spin_unlock(l);
 267}
 268
 269#else
 270
 271static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
 272{
 273        atomic_t *l = (void *)lock;
 274
 275        BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
 276        do {
 277                atomic_cond_read_relaxed(l, !VAL);
 278        } while (atomic_xchg(l, 1));
 279}
 280
 281static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
 282{
 283        atomic_t *l = (void *)lock;
 284
 285        atomic_set_release(l, 0);
 286}
 287
 288#endif
 289
 290static DEFINE_PER_CPU(unsigned long, irqsave_flags);
 291
 292static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
 293{
 294        unsigned long flags;
 295
 296        local_irq_save(flags);
 297        __bpf_spin_lock(lock);
 298        __this_cpu_write(irqsave_flags, flags);
 299}
 300
 301notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
 302{
 303        __bpf_spin_lock_irqsave(lock);
 304        return 0;
 305}
 306
 307const struct bpf_func_proto bpf_spin_lock_proto = {
 308        .func           = bpf_spin_lock,
 309        .gpl_only       = false,
 310        .ret_type       = RET_VOID,
 311        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
 312};
 313
 314static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
 315{
 316        unsigned long flags;
 317
 318        flags = __this_cpu_read(irqsave_flags);
 319        __bpf_spin_unlock(lock);
 320        local_irq_restore(flags);
 321}
 322
 323notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
 324{
 325        __bpf_spin_unlock_irqrestore(lock);
 326        return 0;
 327}
 328
 329const struct bpf_func_proto bpf_spin_unlock_proto = {
 330        .func           = bpf_spin_unlock,
 331        .gpl_only       = false,
 332        .ret_type       = RET_VOID,
 333        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
 334};
 335
 336void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 337                           bool lock_src)
 338{
 339        struct bpf_spin_lock *lock;
 340
 341        if (lock_src)
 342                lock = src + map->spin_lock_off;
 343        else
 344                lock = dst + map->spin_lock_off;
 345        preempt_disable();
 346        __bpf_spin_lock_irqsave(lock);
 347        copy_map_value(map, dst, src);
 348        __bpf_spin_unlock_irqrestore(lock);
 349        preempt_enable();
 350}
 351
 352BPF_CALL_0(bpf_jiffies64)
 353{
 354        return get_jiffies_64();
 355}
 356
 357const struct bpf_func_proto bpf_jiffies64_proto = {
 358        .func           = bpf_jiffies64,
 359        .gpl_only       = false,
 360        .ret_type       = RET_INTEGER,
 361};
 362
 363#ifdef CONFIG_CGROUPS
 364BPF_CALL_0(bpf_get_current_cgroup_id)
 365{
 366        struct cgroup *cgrp;
 367        u64 cgrp_id;
 368
 369        rcu_read_lock();
 370        cgrp = task_dfl_cgroup(current);
 371        cgrp_id = cgroup_id(cgrp);
 372        rcu_read_unlock();
 373
 374        return cgrp_id;
 375}
 376
 377const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 378        .func           = bpf_get_current_cgroup_id,
 379        .gpl_only       = false,
 380        .ret_type       = RET_INTEGER,
 381};
 382
 383BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
 384{
 385        struct cgroup *cgrp;
 386        struct cgroup *ancestor;
 387        u64 cgrp_id;
 388
 389        rcu_read_lock();
 390        cgrp = task_dfl_cgroup(current);
 391        ancestor = cgroup_ancestor(cgrp, ancestor_level);
 392        cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
 393        rcu_read_unlock();
 394
 395        return cgrp_id;
 396}
 397
 398const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 399        .func           = bpf_get_current_ancestor_cgroup_id,
 400        .gpl_only       = false,
 401        .ret_type       = RET_INTEGER,
 402        .arg1_type      = ARG_ANYTHING,
 403};
 404
 405#ifdef CONFIG_CGROUP_BPF
 406
 407BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 408{
 409        /* flags argument is not used now,
 410         * but provides an ability to extend the API.
 411         * verifier checks that its value is correct.
 412         */
 413        enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 414        struct bpf_cgroup_storage *storage;
 415        struct bpf_cg_run_ctx *ctx;
 416        void *ptr;
 417
 418        /* get current cgroup storage from BPF run context */
 419        ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
 420        storage = ctx->prog_item->cgroup_storage[stype];
 421
 422        if (stype == BPF_CGROUP_STORAGE_SHARED)
 423                ptr = &READ_ONCE(storage->buf)->data[0];
 424        else
 425                ptr = this_cpu_ptr(storage->percpu_buf);
 426
 427        return (unsigned long)ptr;
 428}
 429
 430const struct bpf_func_proto bpf_get_local_storage_proto = {
 431        .func           = bpf_get_local_storage,
 432        .gpl_only       = false,
 433        .ret_type       = RET_PTR_TO_MAP_VALUE,
 434        .arg1_type      = ARG_CONST_MAP_PTR,
 435        .arg2_type      = ARG_ANYTHING,
 436};
 437#endif
 438
 439#define BPF_STRTOX_BASE_MASK 0x1F
 440
 441static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
 442                          unsigned long long *res, bool *is_negative)
 443{
 444        unsigned int base = flags & BPF_STRTOX_BASE_MASK;
 445        const char *cur_buf = buf;
 446        size_t cur_len = buf_len;
 447        unsigned int consumed;
 448        size_t val_len;
 449        char str[64];
 450
 451        if (!buf || !buf_len || !res || !is_negative)
 452                return -EINVAL;
 453
 454        if (base != 0 && base != 8 && base != 10 && base != 16)
 455                return -EINVAL;
 456
 457        if (flags & ~BPF_STRTOX_BASE_MASK)
 458                return -EINVAL;
 459
 460        while (cur_buf < buf + buf_len && isspace(*cur_buf))
 461                ++cur_buf;
 462
 463        *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
 464        if (*is_negative)
 465                ++cur_buf;
 466
 467        consumed = cur_buf - buf;
 468        cur_len -= consumed;
 469        if (!cur_len)
 470                return -EINVAL;
 471
 472        cur_len = min(cur_len, sizeof(str) - 1);
 473        memcpy(str, cur_buf, cur_len);
 474        str[cur_len] = '\0';
 475        cur_buf = str;
 476
 477        cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
 478        val_len = _parse_integer(cur_buf, base, res);
 479
 480        if (val_len & KSTRTOX_OVERFLOW)
 481                return -ERANGE;
 482
 483        if (val_len == 0)
 484                return -EINVAL;
 485
 486        cur_buf += val_len;
 487        consumed += cur_buf - str;
 488
 489        return consumed;
 490}
 491
 492static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
 493                         long long *res)
 494{
 495        unsigned long long _res;
 496        bool is_negative;
 497        int err;
 498
 499        err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
 500        if (err < 0)
 501                return err;
 502        if (is_negative) {
 503                if ((long long)-_res > 0)
 504                        return -ERANGE;
 505                *res = -_res;
 506        } else {
 507                if ((long long)_res < 0)
 508                        return -ERANGE;
 509                *res = _res;
 510        }
 511        return err;
 512}
 513
 514BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
 515           long *, res)
 516{
 517        long long _res;
 518        int err;
 519
 520        err = __bpf_strtoll(buf, buf_len, flags, &_res);
 521        if (err < 0)
 522                return err;
 523        if (_res != (long)_res)
 524                return -ERANGE;
 525        *res = _res;
 526        return err;
 527}
 528
 529const struct bpf_func_proto bpf_strtol_proto = {
 530        .func           = bpf_strtol,
 531        .gpl_only       = false,
 532        .ret_type       = RET_INTEGER,
 533        .arg1_type      = ARG_PTR_TO_MEM,
 534        .arg2_type      = ARG_CONST_SIZE,
 535        .arg3_type      = ARG_ANYTHING,
 536        .arg4_type      = ARG_PTR_TO_LONG,
 537};
 538
 539BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
 540           unsigned long *, res)
 541{
 542        unsigned long long _res;
 543        bool is_negative;
 544        int err;
 545
 546        err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
 547        if (err < 0)
 548                return err;
 549        if (is_negative)
 550                return -EINVAL;
 551        if (_res != (unsigned long)_res)
 552                return -ERANGE;
 553        *res = _res;
 554        return err;
 555}
 556
 557const struct bpf_func_proto bpf_strtoul_proto = {
 558        .func           = bpf_strtoul,
 559        .gpl_only       = false,
 560        .ret_type       = RET_INTEGER,
 561        .arg1_type      = ARG_PTR_TO_MEM,
 562        .arg2_type      = ARG_CONST_SIZE,
 563        .arg3_type      = ARG_ANYTHING,
 564        .arg4_type      = ARG_PTR_TO_LONG,
 565};
 566#endif
 567
 568BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
 569           struct bpf_pidns_info *, nsdata, u32, size)
 570{
 571        struct task_struct *task = current;
 572        struct pid_namespace *pidns;
 573        int err = -EINVAL;
 574
 575        if (unlikely(size != sizeof(struct bpf_pidns_info)))
 576                goto clear;
 577
 578        if (unlikely((u64)(dev_t)dev != dev))
 579                goto clear;
 580
 581        if (unlikely(!task))
 582                goto clear;
 583
 584        pidns = task_active_pid_ns(task);
 585        if (unlikely(!pidns)) {
 586                err = -ENOENT;
 587                goto clear;
 588        }
 589
 590        if (!ns_match(&pidns->ns, (dev_t)dev, ino))
 591                goto clear;
 592
 593        nsdata->pid = task_pid_nr_ns(task, pidns);
 594        nsdata->tgid = task_tgid_nr_ns(task, pidns);
 595        return 0;
 596clear:
 597        memset((void *)nsdata, 0, (size_t) size);
 598        return err;
 599}
 600
 601const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
 602        .func           = bpf_get_ns_current_pid_tgid,
 603        .gpl_only       = false,
 604        .ret_type       = RET_INTEGER,
 605        .arg1_type      = ARG_ANYTHING,
 606        .arg2_type      = ARG_ANYTHING,
 607        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
 608        .arg4_type      = ARG_CONST_SIZE,
 609};
 610
 611static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
 612        .func           = bpf_get_raw_cpu_id,
 613        .gpl_only       = false,
 614        .ret_type       = RET_INTEGER,
 615};
 616
 617BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
 618           u64, flags, void *, data, u64, size)
 619{
 620        if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
 621                return -EINVAL;
 622
 623        return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
 624}
 625
 626const struct bpf_func_proto bpf_event_output_data_proto =  {
 627        .func           = bpf_event_output_data,
 628        .gpl_only       = true,
 629        .ret_type       = RET_INTEGER,
 630        .arg1_type      = ARG_PTR_TO_CTX,
 631        .arg2_type      = ARG_CONST_MAP_PTR,
 632        .arg3_type      = ARG_ANYTHING,
 633        .arg4_type      = ARG_PTR_TO_MEM,
 634        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 635};
 636
 637BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
 638           const void __user *, user_ptr)
 639{
 640        int ret = copy_from_user(dst, user_ptr, size);
 641
 642        if (unlikely(ret)) {
 643                memset(dst, 0, size);
 644                ret = -EFAULT;
 645        }
 646
 647        return ret;
 648}
 649
 650const struct bpf_func_proto bpf_copy_from_user_proto = {
 651        .func           = bpf_copy_from_user,
 652        .gpl_only       = false,
 653        .ret_type       = RET_INTEGER,
 654        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
 655        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
 656        .arg3_type      = ARG_ANYTHING,
 657};
 658
 659BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 660{
 661        if (cpu >= nr_cpu_ids)
 662                return (unsigned long)NULL;
 663
 664        return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
 665}
 666
 667const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
 668        .func           = bpf_per_cpu_ptr,
 669        .gpl_only       = false,
 670        .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
 671        .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
 672        .arg2_type      = ARG_ANYTHING,
 673};
 674
 675BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
 676{
 677        return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
 678}
 679
 680const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
 681        .func           = bpf_this_cpu_ptr,
 682        .gpl_only       = false,
 683        .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID,
 684        .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
 685};
 686
 687static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 688                size_t bufsz)
 689{
 690        void __user *user_ptr = (__force void __user *)unsafe_ptr;
 691
 692        buf[0] = 0;
 693
 694        switch (fmt_ptype) {
 695        case 's':
 696#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 697                if ((unsigned long)unsafe_ptr < TASK_SIZE)
 698                        return strncpy_from_user_nofault(buf, user_ptr, bufsz);
 699                fallthrough;
 700#endif
 701        case 'k':
 702                return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
 703        case 'u':
 704                return strncpy_from_user_nofault(buf, user_ptr, bufsz);
 705        }
 706
 707        return -EINVAL;
 708}
 709
 710/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
 711 * arguments representation.
 712 */
 713#define MAX_BPRINTF_BUF_LEN     512
 714
 715/* Support executing three nested bprintf helper calls on a given CPU */
 716#define MAX_BPRINTF_NEST_LEVEL  3
 717struct bpf_bprintf_buffers {
 718        char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
 719};
 720static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
 721static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
 722
 723static int try_get_fmt_tmp_buf(char **tmp_buf)
 724{
 725        struct bpf_bprintf_buffers *bufs;
 726        int nest_level;
 727
 728        preempt_disable();
 729        nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
 730        if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
 731                this_cpu_dec(bpf_bprintf_nest_level);
 732                preempt_enable();
 733                return -EBUSY;
 734        }
 735        bufs = this_cpu_ptr(&bpf_bprintf_bufs);
 736        *tmp_buf = bufs->tmp_bufs[nest_level - 1];
 737
 738        return 0;
 739}
 740
 741void bpf_bprintf_cleanup(void)
 742{
 743        if (this_cpu_read(bpf_bprintf_nest_level)) {
 744                this_cpu_dec(bpf_bprintf_nest_level);
 745                preempt_enable();
 746        }
 747}
 748
 749/*
 750 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
 751 *
 752 * Returns a negative value if fmt is an invalid format string or 0 otherwise.
 753 *
 754 * This can be used in two ways:
 755 * - Format string verification only: when bin_args is NULL
 756 * - Arguments preparation: in addition to the above verification, it writes in
 757 *   bin_args a binary representation of arguments usable by bstr_printf where
 758 *   pointers from BPF have been sanitized.
 759 *
 760 * In argument preparation mode, if 0 is returned, safe temporary buffers are
 761 * allocated and bpf_bprintf_cleanup should be called to free them after use.
 762 */
 763int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 764                        u32 **bin_args, u32 num_args)
 765{
 766        char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
 767        size_t sizeof_cur_arg, sizeof_cur_ip;
 768        int err, i, num_spec = 0;
 769        u64 cur_arg;
 770        char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
 771
 772        fmt_end = strnchr(fmt, fmt_size, 0);
 773        if (!fmt_end)
 774                return -EINVAL;
 775        fmt_size = fmt_end - fmt;
 776
 777        if (bin_args) {
 778                if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
 779                        return -EBUSY;
 780
 781                tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
 782                *bin_args = (u32 *)tmp_buf;
 783        }
 784
 785        for (i = 0; i < fmt_size; i++) {
 786                if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
 787                        err = -EINVAL;
 788                        goto out;
 789                }
 790
 791                if (fmt[i] != '%')
 792                        continue;
 793
 794                if (fmt[i + 1] == '%') {
 795                        i++;
 796                        continue;
 797                }
 798
 799                if (num_spec >= num_args) {
 800                        err = -EINVAL;
 801                        goto out;
 802                }
 803
 804                /* The string is zero-terminated so if fmt[i] != 0, we can
 805                 * always access fmt[i + 1], in the worst case it will be a 0
 806                 */
 807                i++;
 808
 809                /* skip optional "[0 +-][num]" width formatting field */
 810                while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
 811                       fmt[i] == ' ')
 812                        i++;
 813                if (fmt[i] >= '1' && fmt[i] <= '9') {
 814                        i++;
 815                        while (fmt[i] >= '0' && fmt[i] <= '9')
 816                                i++;
 817                }
 818
 819                if (fmt[i] == 'p') {
 820                        sizeof_cur_arg = sizeof(long);
 821
 822                        if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
 823                            fmt[i + 2] == 's') {
 824                                fmt_ptype = fmt[i + 1];
 825                                i += 2;
 826                                goto fmt_str;
 827                        }
 828
 829                        if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
 830                            ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
 831                            fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
 832                            fmt[i + 1] == 'S') {
 833                                /* just kernel pointers */
 834                                if (tmp_buf)
 835                                        cur_arg = raw_args[num_spec];
 836                                i++;
 837                                goto nocopy_fmt;
 838                        }
 839
 840                        if (fmt[i + 1] == 'B') {
 841                                if (tmp_buf)  {
 842                                        err = snprintf(tmp_buf,
 843                                                       (tmp_buf_end - tmp_buf),
 844                                                       "%pB",
 845                                                       (void *)(long)raw_args[num_spec]);
 846                                        tmp_buf += (err + 1);
 847                                }
 848
 849                                i++;
 850                                num_spec++;
 851                                continue;
 852                        }
 853
 854                        /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
 855                        if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
 856                            (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
 857                                err = -EINVAL;
 858                                goto out;
 859                        }
 860
 861                        i += 2;
 862                        if (!tmp_buf)
 863                                goto nocopy_fmt;
 864
 865                        sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
 866                        if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
 867                                err = -ENOSPC;
 868                                goto out;
 869                        }
 870
 871                        unsafe_ptr = (char *)(long)raw_args[num_spec];
 872                        err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
 873                                                       sizeof_cur_ip);
 874                        if (err < 0)
 875                                memset(cur_ip, 0, sizeof_cur_ip);
 876
 877                        /* hack: bstr_printf expects IP addresses to be
 878                         * pre-formatted as strings, ironically, the easiest way
 879                         * to do that is to call snprintf.
 880                         */
 881                        ip_spec[2] = fmt[i - 1];
 882                        ip_spec[3] = fmt[i];
 883                        err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
 884                                       ip_spec, &cur_ip);
 885
 886                        tmp_buf += err + 1;
 887                        num_spec++;
 888
 889                        continue;
 890                } else if (fmt[i] == 's') {
 891                        fmt_ptype = fmt[i];
 892fmt_str:
 893                        if (fmt[i + 1] != 0 &&
 894                            !isspace(fmt[i + 1]) &&
 895                            !ispunct(fmt[i + 1])) {
 896                                err = -EINVAL;
 897                                goto out;
 898                        }
 899
 900                        if (!tmp_buf)
 901                                goto nocopy_fmt;
 902
 903                        if (tmp_buf_end == tmp_buf) {
 904                                err = -ENOSPC;
 905                                goto out;
 906                        }
 907
 908                        unsafe_ptr = (char *)(long)raw_args[num_spec];
 909                        err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
 910                                                    fmt_ptype,
 911                                                    tmp_buf_end - tmp_buf);
 912                        if (err < 0) {
 913                                tmp_buf[0] = '\0';
 914                                err = 1;
 915                        }
 916
 917                        tmp_buf += err;
 918                        num_spec++;
 919
 920                        continue;
 921                } else if (fmt[i] == 'c') {
 922                        if (!tmp_buf)
 923                                goto nocopy_fmt;
 924
 925                        if (tmp_buf_end == tmp_buf) {
 926                                err = -ENOSPC;
 927                                goto out;
 928                        }
 929
 930                        *tmp_buf = raw_args[num_spec];
 931                        tmp_buf++;
 932                        num_spec++;
 933
 934                        continue;
 935                }
 936
 937                sizeof_cur_arg = sizeof(int);
 938
 939                if (fmt[i] == 'l') {
 940                        sizeof_cur_arg = sizeof(long);
 941                        i++;
 942                }
 943                if (fmt[i] == 'l') {
 944                        sizeof_cur_arg = sizeof(long long);
 945                        i++;
 946                }
 947
 948                if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
 949                    fmt[i] != 'x' && fmt[i] != 'X') {
 950                        err = -EINVAL;
 951                        goto out;
 952                }
 953
 954                if (tmp_buf)
 955                        cur_arg = raw_args[num_spec];
 956nocopy_fmt:
 957                if (tmp_buf) {
 958                        tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
 959                        if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
 960                                err = -ENOSPC;
 961                                goto out;
 962                        }
 963
 964                        if (sizeof_cur_arg == 8) {
 965                                *(u32 *)tmp_buf = *(u32 *)&cur_arg;
 966                                *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
 967                        } else {
 968                                *(u32 *)tmp_buf = (u32)(long)cur_arg;
 969                        }
 970                        tmp_buf += sizeof_cur_arg;
 971                }
 972                num_spec++;
 973        }
 974
 975        err = 0;
 976out:
 977        if (err)
 978                bpf_bprintf_cleanup();
 979        return err;
 980}
 981
 982#define MAX_SNPRINTF_VARARGS            12
 983
 984BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
 985           const void *, data, u32, data_len)
 986{
 987        int err, num_args;
 988        u32 *bin_args;
 989
 990        if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
 991            (data_len && !data))
 992                return -EINVAL;
 993        num_args = data_len / 8;
 994
 995        /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
 996         * can safely give an unbounded size.
 997         */
 998        err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
 999        if (err < 0)
1000                return err;
1001
1002        err = bstr_printf(str, str_size, fmt, bin_args);
1003
1004        bpf_bprintf_cleanup();
1005
1006        return err + 1;
1007}
1008
1009const struct bpf_func_proto bpf_snprintf_proto = {
1010        .func           = bpf_snprintf,
1011        .gpl_only       = true,
1012        .ret_type       = RET_INTEGER,
1013        .arg1_type      = ARG_PTR_TO_MEM_OR_NULL,
1014        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
1015        .arg3_type      = ARG_PTR_TO_CONST_STR,
1016        .arg4_type      = ARG_PTR_TO_MEM_OR_NULL,
1017        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
1018};
1019
1020/* BPF map elements can contain 'struct bpf_timer'.
1021 * Such map owns all of its BPF timers.
1022 * 'struct bpf_timer' is allocated as part of map element allocation
1023 * and it's zero initialized.
1024 * That space is used to keep 'struct bpf_timer_kern'.
1025 * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1026 * remembers 'struct bpf_map *' pointer it's part of.
1027 * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1028 * bpf_timer_start() arms the timer.
1029 * If user space reference to a map goes to zero at this point
1030 * ops->map_release_uref callback is responsible for cancelling the timers,
1031 * freeing their memory, and decrementing prog's refcnts.
1032 * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1033 * Inner maps can contain bpf timers as well. ops->map_release_uref is
1034 * freeing the timers when inner map is replaced or deleted by user space.
1035 */
1036struct bpf_hrtimer {
1037        struct hrtimer timer;
1038        struct bpf_map *map;
1039        struct bpf_prog *prog;
1040        void __rcu *callback_fn;
1041        void *value;
1042};
1043
1044/* the actual struct hidden inside uapi struct bpf_timer */
1045struct bpf_timer_kern {
1046        struct bpf_hrtimer *timer;
1047        /* bpf_spin_lock is used here instead of spinlock_t to make
1048         * sure that it always fits into space resereved by struct bpf_timer
1049         * regardless of LOCKDEP and spinlock debug flags.
1050         */
1051        struct bpf_spin_lock lock;
1052} __attribute__((aligned(8)));
1053
1054static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1055
1056static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1057{
1058        struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1059        struct bpf_map *map = t->map;
1060        void *value = t->value;
1061        void *callback_fn;
1062        void *key;
1063        u32 idx;
1064
1065        callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
1066        if (!callback_fn)
1067                goto out;
1068
1069        /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1070         * cannot be preempted by another bpf_timer_cb() on the same cpu.
1071         * Remember the timer this callback is servicing to prevent
1072         * deadlock if callback_fn() calls bpf_timer_cancel() or
1073         * bpf_map_delete_elem() on the same timer.
1074         */
1075        this_cpu_write(hrtimer_running, t);
1076        if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1077                struct bpf_array *array = container_of(map, struct bpf_array, map);
1078
1079                /* compute the key */
1080                idx = ((char *)value - array->value) / array->elem_size;
1081                key = &idx;
1082        } else { /* hash or lru */
1083                key = value - round_up(map->key_size, 8);
1084        }
1085
1086        BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
1087                                   (u64)(long)value, 0, 0);
1088        /* The verifier checked that return value is zero. */
1089
1090        this_cpu_write(hrtimer_running, NULL);
1091out:
1092        return HRTIMER_NORESTART;
1093}
1094
1095BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
1096           u64, flags)
1097{
1098        clockid_t clockid = flags & (MAX_CLOCKS - 1);
1099        struct bpf_hrtimer *t;
1100        int ret = 0;
1101
1102        BUILD_BUG_ON(MAX_CLOCKS != 16);
1103        BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
1104        BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
1105
1106        if (in_nmi())
1107                return -EOPNOTSUPP;
1108
1109        if (flags >= MAX_CLOCKS ||
1110            /* similar to timerfd except _ALARM variants are not supported */
1111            (clockid != CLOCK_MONOTONIC &&
1112             clockid != CLOCK_REALTIME &&
1113             clockid != CLOCK_BOOTTIME))
1114                return -EINVAL;
1115        __bpf_spin_lock_irqsave(&timer->lock);
1116        t = timer->timer;
1117        if (t) {
1118                ret = -EBUSY;
1119                goto out;
1120        }
1121        if (!atomic64_read(&map->usercnt)) {
1122                /* maps with timers must be either held by user space
1123                 * or pinned in bpffs.
1124                 */
1125                ret = -EPERM;
1126                goto out;
1127        }
1128        /* allocate hrtimer via map_kmalloc to use memcg accounting */
1129        t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
1130        if (!t) {
1131                ret = -ENOMEM;
1132                goto out;
1133        }
1134        t->value = (void *)timer - map->timer_off;
1135        t->map = map;
1136        t->prog = NULL;
1137        rcu_assign_pointer(t->callback_fn, NULL);
1138        hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1139        t->timer.function = bpf_timer_cb;
1140        timer->timer = t;
1141out:
1142        __bpf_spin_unlock_irqrestore(&timer->lock);
1143        return ret;
1144}
1145
1146static const struct bpf_func_proto bpf_timer_init_proto = {
1147        .func           = bpf_timer_init,
1148        .gpl_only       = true,
1149        .ret_type       = RET_INTEGER,
1150        .arg1_type      = ARG_PTR_TO_TIMER,
1151        .arg2_type      = ARG_CONST_MAP_PTR,
1152        .arg3_type      = ARG_ANYTHING,
1153};
1154
1155BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
1156           struct bpf_prog_aux *, aux)
1157{
1158        struct bpf_prog *prev, *prog = aux->prog;
1159        struct bpf_hrtimer *t;
1160        int ret = 0;
1161
1162        if (in_nmi())
1163                return -EOPNOTSUPP;
1164        __bpf_spin_lock_irqsave(&timer->lock);
1165        t = timer->timer;
1166        if (!t) {
1167                ret = -EINVAL;
1168                goto out;
1169        }
1170        if (!atomic64_read(&t->map->usercnt)) {
1171                /* maps with timers must be either held by user space
1172                 * or pinned in bpffs. Otherwise timer might still be
1173                 * running even when bpf prog is detached and user space
1174                 * is gone, since map_release_uref won't ever be called.
1175                 */
1176                ret = -EPERM;
1177                goto out;
1178        }
1179        prev = t->prog;
1180        if (prev != prog) {
1181                /* Bump prog refcnt once. Every bpf_timer_set_callback()
1182                 * can pick different callback_fn-s within the same prog.
1183                 */
1184                prog = bpf_prog_inc_not_zero(prog);
1185                if (IS_ERR(prog)) {
1186                        ret = PTR_ERR(prog);
1187                        goto out;
1188                }
1189                if (prev)
1190                        /* Drop prev prog refcnt when swapping with new prog */
1191                        bpf_prog_put(prev);
1192                t->prog = prog;
1193        }
1194        rcu_assign_pointer(t->callback_fn, callback_fn);
1195out:
1196        __bpf_spin_unlock_irqrestore(&timer->lock);
1197        return ret;
1198}
1199
1200static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1201        .func           = bpf_timer_set_callback,
1202        .gpl_only       = true,
1203        .ret_type       = RET_INTEGER,
1204        .arg1_type      = ARG_PTR_TO_TIMER,
1205        .arg2_type      = ARG_PTR_TO_FUNC,
1206};
1207
1208BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
1209{
1210        struct bpf_hrtimer *t;
1211        int ret = 0;
1212
1213        if (in_nmi())
1214                return -EOPNOTSUPP;
1215        if (flags)
1216                return -EINVAL;
1217        __bpf_spin_lock_irqsave(&timer->lock);
1218        t = timer->timer;
1219        if (!t || !t->prog) {
1220                ret = -EINVAL;
1221                goto out;
1222        }
1223        hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
1224out:
1225        __bpf_spin_unlock_irqrestore(&timer->lock);
1226        return ret;
1227}
1228
1229static const struct bpf_func_proto bpf_timer_start_proto = {
1230        .func           = bpf_timer_start,
1231        .gpl_only       = true,
1232        .ret_type       = RET_INTEGER,
1233        .arg1_type      = ARG_PTR_TO_TIMER,
1234        .arg2_type      = ARG_ANYTHING,
1235        .arg3_type      = ARG_ANYTHING,
1236};
1237
1238static void drop_prog_refcnt(struct bpf_hrtimer *t)
1239{
1240        struct bpf_prog *prog = t->prog;
1241
1242        if (prog) {
1243                bpf_prog_put(prog);
1244                t->prog = NULL;
1245                rcu_assign_pointer(t->callback_fn, NULL);
1246        }
1247}
1248
1249BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
1250{
1251        struct bpf_hrtimer *t;
1252        int ret = 0;
1253
1254        if (in_nmi())
1255                return -EOPNOTSUPP;
1256        __bpf_spin_lock_irqsave(&timer->lock);
1257        t = timer->timer;
1258        if (!t) {
1259                ret = -EINVAL;
1260                goto out;
1261        }
1262        if (this_cpu_read(hrtimer_running) == t) {
1263                /* If bpf callback_fn is trying to bpf_timer_cancel()
1264                 * its own timer the hrtimer_cancel() will deadlock
1265                 * since it waits for callback_fn to finish
1266                 */
1267                ret = -EDEADLK;
1268                goto out;
1269        }
1270        drop_prog_refcnt(t);
1271out:
1272        __bpf_spin_unlock_irqrestore(&timer->lock);
1273        /* Cancel the timer and wait for associated callback to finish
1274         * if it was running.
1275         */
1276        ret = ret ?: hrtimer_cancel(&t->timer);
1277        return ret;
1278}
1279
1280static const struct bpf_func_proto bpf_timer_cancel_proto = {
1281        .func           = bpf_timer_cancel,
1282        .gpl_only       = true,
1283        .ret_type       = RET_INTEGER,
1284        .arg1_type      = ARG_PTR_TO_TIMER,
1285};
1286
1287/* This function is called by map_delete/update_elem for individual element and
1288 * by ops->map_release_uref when the user space reference to a map reaches zero.
1289 */
1290void bpf_timer_cancel_and_free(void *val)
1291{
1292        struct bpf_timer_kern *timer = val;
1293        struct bpf_hrtimer *t;
1294
1295        /* Performance optimization: read timer->timer without lock first. */
1296        if (!READ_ONCE(timer->timer))
1297                return;
1298
1299        __bpf_spin_lock_irqsave(&timer->lock);
1300        /* re-read it under lock */
1301        t = timer->timer;
1302        if (!t)
1303                goto out;
1304        drop_prog_refcnt(t);
1305        /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1306         * this timer, since it won't be initialized.
1307         */
1308        timer->timer = NULL;
1309out:
1310        __bpf_spin_unlock_irqrestore(&timer->lock);
1311        if (!t)
1312                return;
1313        /* Cancel the timer and wait for callback to complete if it was running.
1314         * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1315         * right after for both preallocated and non-preallocated maps.
1316         * The timer->timer = NULL was already done and no code path can
1317         * see address 't' anymore.
1318         *
1319         * Check that bpf_map_delete/update_elem() wasn't called from timer
1320         * callback_fn. In such case don't call hrtimer_cancel() (since it will
1321         * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1322         * return -1). Though callback_fn is still running on this cpu it's
1323         * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1324         * from 't'. The bpf subprog callback_fn won't be able to access 't',
1325         * since timer->timer = NULL was already done. The timer will be
1326         * effectively cancelled because bpf_timer_cb() will return
1327         * HRTIMER_NORESTART.
1328         */
1329        if (this_cpu_read(hrtimer_running) != t)
1330                hrtimer_cancel(&t->timer);
1331        kfree(t);
1332}
1333
1334const struct bpf_func_proto bpf_get_current_task_proto __weak;
1335const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1336const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1337const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1338const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1339const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1340const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1341
1342const struct bpf_func_proto *
1343bpf_base_func_proto(enum bpf_func_id func_id)
1344{
1345        switch (func_id) {
1346        case BPF_FUNC_map_lookup_elem:
1347                return &bpf_map_lookup_elem_proto;
1348        case BPF_FUNC_map_update_elem:
1349                return &bpf_map_update_elem_proto;
1350        case BPF_FUNC_map_delete_elem:
1351                return &bpf_map_delete_elem_proto;
1352        case BPF_FUNC_map_push_elem:
1353                return &bpf_map_push_elem_proto;
1354        case BPF_FUNC_map_pop_elem:
1355                return &bpf_map_pop_elem_proto;
1356        case BPF_FUNC_map_peek_elem:
1357                return &bpf_map_peek_elem_proto;
1358        case BPF_FUNC_get_prandom_u32:
1359                return &bpf_get_prandom_u32_proto;
1360        case BPF_FUNC_get_smp_processor_id:
1361                return &bpf_get_raw_smp_processor_id_proto;
1362        case BPF_FUNC_get_numa_node_id:
1363                return &bpf_get_numa_node_id_proto;
1364        case BPF_FUNC_tail_call:
1365                return &bpf_tail_call_proto;
1366        case BPF_FUNC_ktime_get_ns:
1367                return &bpf_ktime_get_ns_proto;
1368        case BPF_FUNC_ktime_get_boot_ns:
1369                return &bpf_ktime_get_boot_ns_proto;
1370        case BPF_FUNC_ktime_get_coarse_ns:
1371                return &bpf_ktime_get_coarse_ns_proto;
1372        case BPF_FUNC_ringbuf_output:
1373                return &bpf_ringbuf_output_proto;
1374        case BPF_FUNC_ringbuf_reserve:
1375                return &bpf_ringbuf_reserve_proto;
1376        case BPF_FUNC_ringbuf_submit:
1377                return &bpf_ringbuf_submit_proto;
1378        case BPF_FUNC_ringbuf_discard:
1379                return &bpf_ringbuf_discard_proto;
1380        case BPF_FUNC_ringbuf_query:
1381                return &bpf_ringbuf_query_proto;
1382        case BPF_FUNC_for_each_map_elem:
1383                return &bpf_for_each_map_elem_proto;
1384        default:
1385                break;
1386        }
1387
1388        if (!bpf_capable())
1389                return NULL;
1390
1391        switch (func_id) {
1392        case BPF_FUNC_spin_lock:
1393                return &bpf_spin_lock_proto;
1394        case BPF_FUNC_spin_unlock:
1395                return &bpf_spin_unlock_proto;
1396        case BPF_FUNC_jiffies64:
1397                return &bpf_jiffies64_proto;
1398        case BPF_FUNC_per_cpu_ptr:
1399                return &bpf_per_cpu_ptr_proto;
1400        case BPF_FUNC_this_cpu_ptr:
1401                return &bpf_this_cpu_ptr_proto;
1402        case BPF_FUNC_timer_init:
1403                return &bpf_timer_init_proto;
1404        case BPF_FUNC_timer_set_callback:
1405                return &bpf_timer_set_callback_proto;
1406        case BPF_FUNC_timer_start:
1407                return &bpf_timer_start_proto;
1408        case BPF_FUNC_timer_cancel:
1409                return &bpf_timer_cancel_proto;
1410        default:
1411                break;
1412        }
1413
1414        if (!perfmon_capable())
1415                return NULL;
1416
1417        switch (func_id) {
1418        case BPF_FUNC_trace_printk:
1419                return bpf_get_trace_printk_proto();
1420        case BPF_FUNC_get_current_task:
1421                return &bpf_get_current_task_proto;
1422        case BPF_FUNC_get_current_task_btf:
1423                return &bpf_get_current_task_btf_proto;
1424        case BPF_FUNC_probe_read_user:
1425                return &bpf_probe_read_user_proto;
1426        case BPF_FUNC_probe_read_kernel:
1427                return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1428                       NULL : &bpf_probe_read_kernel_proto;
1429        case BPF_FUNC_probe_read_user_str:
1430                return &bpf_probe_read_user_str_proto;
1431        case BPF_FUNC_probe_read_kernel_str:
1432                return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1433                       NULL : &bpf_probe_read_kernel_str_proto;
1434        case BPF_FUNC_snprintf_btf:
1435                return &bpf_snprintf_btf_proto;
1436        case BPF_FUNC_snprintf:
1437                return &bpf_snprintf_proto;
1438        case BPF_FUNC_task_pt_regs:
1439                return &bpf_task_pt_regs_proto;
1440        default:
1441                return NULL;
1442        }
1443}
1444